diff --git a/integrations/google_ai/example_assets/robot1.jpg b/integrations/google_ai/example_assets/robot1.jpg new file mode 100644 index 000000000..a3962db1b Binary files /dev/null and b/integrations/google_ai/example_assets/robot1.jpg differ diff --git a/integrations/google_ai/example_assets/robot2.jpg b/integrations/google_ai/example_assets/robot2.jpg new file mode 100644 index 000000000..2293f7b7c Binary files /dev/null and b/integrations/google_ai/example_assets/robot2.jpg differ diff --git a/integrations/google_ai/example_assets/robot3.jpg b/integrations/google_ai/example_assets/robot3.jpg new file mode 100644 index 000000000..581c1eb4f Binary files /dev/null and b/integrations/google_ai/example_assets/robot3.jpg differ diff --git a/integrations/google_ai/example_assets/robot4.jpg b/integrations/google_ai/example_assets/robot4.jpg new file mode 100644 index 000000000..5812f0898 Binary files /dev/null and b/integrations/google_ai/example_assets/robot4.jpg differ diff --git a/integrations/google_ai/src/google_ai_haystack/generators/chat/gemini.py b/integrations/google_ai/src/google_ai_haystack/generators/chat/gemini.py index 1f5557e0d..9bf33d8d3 100644 --- a/integrations/google_ai/src/google_ai_haystack/generators/chat/gemini.py +++ b/integrations/google_ai/src/google_ai_haystack/generators/chat/gemini.py @@ -15,6 +15,83 @@ @component class GoogleAIGeminiChatGenerator: + """ + GoogleAIGeminiGenerator is a multi modal generator supporting Gemini via Google Makersuite. + + Sample usage: + ```python + from haystack.dataclasses.chat_message import ChatMessage + from google_ai_haystack.generators.chat.gemini import GoogleAIGeminiChatGenerator + + + gemini_chat = GoogleAIGeminiChatGenerator(model="gemini-pro", api_key="") + + messages = [ChatMessage.from_user("What is the most interesting thing you know?")] + res = gemini_chat.run(messages=messages) + for reply in res["replies"]: + print(reply.content) + + messages += res["replies"] + [ChatMessage.from_user("Tell me more about it")] + res = gemini_chat.run(messages=messages) + for reply in res["replies"]: + print(reply.content) + ``` + + + This is a more advanced usage that also uses function calls: + ```python + from haystack.dataclasses.chat_message import ChatMessage + from google.ai.generativelanguage import FunctionDeclaration, Tool + + from google_ai_haystack.generators.chat.gemini import GoogleAIGeminiChatGenerator + + # Example function to get the current weather + def get_current_weather(location: str, unit: str = "celsius") -> str: + # Call a weather API and return some text + ... + + # Define the function interface so that Gemini can call it + get_current_weather_func = FunctionDeclaration( + name="get_current_weather", + description="Get the current weather in a given location", + parameters={ + "type": "object", + "properties": { + "location": {"type": "string", "description": "The city and state, e.g. San Francisco, CA"}, + "unit": { + "type": "string", + "enum": [ + "celsius", + "fahrenheit", + ], + }, + }, + "required": ["location"], + }, + ) + tool = Tool([get_current_weather_func]) + + messages = [ChatMessage.from_user("What is the most interesting thing you know?")] + + gemini_chat = GoogleAIGeminiChatGenerator(model="gemini-pro", api_key="", tools=[tool]) + + messages = [ChatMessage.from_user(content = "What is the temperature in celsius in Berlin?")] + res = gemini_chat.run(messages=messages) + + weather = get_current_weather(**res["replies"][0].content) + messages += res["replies"] + [ChatMessage.from_function(content=weather, name="get_current_weather")] + res = gemini_chat.run(messages=messages) + for reply in res["replies"]: + print(reply.content) + ``` + + Input: + - **messages** A list of ChatMessage objects. + + Output: + - **replies** A list of ChatMessage objects containing the one or more replies from the model. + """ + def __init__( self, *, @@ -25,7 +102,31 @@ def __init__( tools: Optional[List[Tool]] = None, ): """ - Multi modal generator using Gemini model via Makersuite + Initialize a GoogleAIGeminiChatGenerator instance. + If `api_key` is `None` it will use the `GOOGLE_API_KEY` env variable for authentication. + + To get an API key, visit: https://makersuite.google.com + + It supports the following models: + * `gemini-pro` + * `gemini-pro-vision` + * `gemini-ultra` + + :param api_key: Google Makersuite API key, defaults to None + :param model: Name of the model to use, defaults to "gemini-pro-vision" + :param generation_config: The generation config to use, defaults to None. + Can either be a GenerationConfig object or a dictionary of parameters. + Accepted parameters are: + - temperature + - top_p + - top_k + - candidate_count + - max_output_tokens + - stop_sequences + :param safety_settings: The safety settings to use, defaults to None. + A dictionary of HarmCategory to HarmBlockThreshold. + :param tools: The tools to use, defaults to None. + A list of Tool objects that can be used to modify the generation process. """ # Authenticate, if api_key is None it will use the GOOGLE_API_KEY env variable diff --git a/integrations/google_ai/src/google_ai_haystack/generators/gemini.py b/integrations/google_ai/src/google_ai_haystack/generators/gemini.py index d05d99c60..bd4ab5150 100644 --- a/integrations/google_ai/src/google_ai_haystack/generators/gemini.py +++ b/integrations/google_ai/src/google_ai_haystack/generators/gemini.py @@ -15,6 +15,54 @@ @component class GoogleAIGeminiGenerator: + """ + GoogleAIGeminiGenerator is a multi modal generator supporting Gemini via Google Makersuite. + + Sample usage: + ```python + from google_ai_haystack.generators.gemini import GoogleAIGeminiGenerator + + gemini = GoogleAIGeminiGenerator(model="gemini-pro", api_key="") + res = gemini.run(parts = ["What is the most interesting thing you know?"]) + for answer in res["answers"]: + print(answer) + ``` + + This is a more advanced usage that also uses text and images as input: + ```python + import requests + from haystack.dataclasses.byte_stream import ByteStream + from google_ai_haystack.generators.gemini import GoogleAIGeminiGenerator + + BASE_URL = ( + "https://raw.githubusercontent.com/deepset-ai/haystack-core-integrations" + "/main/integrations/google_ai/example_assets" + ) + + URLS = [ + f"{BASE_URL}/robot1.jpg", + f"{BASE_URL}/robot2.jpg", + f"{BASE_URL}/robot3.jpg", + f"{BASE_URL}/robot4.jpg" + ] + images = [ + ByteStream(data=requests.get(url).content, mime_type="image/jpeg") + for url in URLS + ] + + gemini = GoogleAIGeminiGenerator(model="gemini-pro-vision", api_key="") + result = gemini.run(parts = ["What can you tell me about this robots?", *images]) + for answer in result["answers"]: + print(answer) + ``` + + Input: + - **parts** A eterogeneous list of strings, ByteStream or Part objects. + + Output: + - **answers** A list of strings or dictionaries with function calls. + """ + def __init__( self, *, @@ -25,9 +73,32 @@ def __init__( tools: Optional[List[Tool]] = None, ): """ - Multi modal generator using Gemini model via Makersuite - """ + Initialize a GoogleAIGeminiGenerator instance. + If `api_key` is `None` it will use the `GOOGLE_API_KEY` env variable for authentication. + To get an API key, visit: https://makersuite.google.com + + It supports the following models: + * `gemini-pro` + * `gemini-pro-vision` + * `gemini-ultra` + + :param api_key: Google Makersuite API key, defaults to None + :param model: Name of the model to use, defaults to "gemini-pro-vision" + :param generation_config: The generation config to use, defaults to None. + Can either be a GenerationConfig object or a dictionary of parameters. + Accepted parameters are: + - temperature + - top_p + - top_k + - candidate_count + - max_output_tokens + - stop_sequences + :param safety_settings: The safety settings to use, defaults to None. + A dictionary of HarmCategory to HarmBlockThreshold. + :param tools: The tools to use, defaults to None. + A list of Tool objects that can be used to modify the generation process. + """ # Authenticate, if api_key is None it will use the GOOGLE_API_KEY env variable genai.configure(api_key=api_key)