From 16de49fa086547351704c16989fa1bd2d2efb363 Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Thu, 2 May 2024 13:47:17 +0200 Subject: [PATCH 1/4] add integration for bedrock Co-Authored-By: redrusty2 <15157208+redrusty2@users.noreply.github.com> --- pyproject.toml | 2 ++ scrapegraphai/graphs/abstract_graph.py | 22 +++++++++++++++++++--- scrapegraphai/helpers/models_tokens.py | 17 +++++++++++++++++ scrapegraphai/models/__init__.py | 1 + scrapegraphai/models/bedrock.py | 19 +++++++++++++++++++ scrapegraphai/nodes/rag_node.py | 12 +++++++++--- 6 files changed, 67 insertions(+), 6 deletions(-) create mode 100644 scrapegraphai/models/bedrock.py diff --git a/pyproject.toml b/pyproject.toml index bed10980..5a6b6e12 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -40,6 +40,8 @@ minify-html = "0.15.0" free-proxy = "1.1.1" langchain-groq = "0.1.3" playwright = "^1.43.0" +langchain-aws = "^0.1.2" + [tool.poetry.dev-dependencies] pytest = "8.0.0" diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py index 5adf8ba6..12c3c39a 100644 --- a/scrapegraphai/graphs/abstract_graph.py +++ b/scrapegraphai/graphs/abstract_graph.py @@ -3,7 +3,8 @@ """ from abc import ABC, abstractmethod from typing import Optional -from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq + +from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq, Bedrock from ..helpers import models_tokens @@ -25,7 +26,8 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None): # Set common configuration parameters self.verbose = True if config is None else config.get("verbose", False) - self.headless = True if config is None else config.get("headless", True) + self.headless = True if config is None else config.get( + "headless", True) # Create the graph self.graph = self._create_graph() @@ -92,12 +94,26 @@ def _create_llm(self, llm_config: dict): return HuggingFace(llm_params) elif "groq" in llm_params["model"]: llm_params["model"] = llm_params["model"].split("/")[-1] - + try: self.model_token = models_tokens["groq"][llm_params["model"]] except KeyError: raise KeyError("Model not supported") return Groq(llm_params) + elif "bedrock" in llm_params["model"]: + llm_params["model"] = llm_params["model"].split("/")[-1] + model_id = llm_params["model"] + + try: + self.model_token = models_tokens["bedrock"][llm_params["model"]] + except KeyError: + raise KeyError("Model not supported") + return Bedrock({ + "model_id": model_id, + "model_kwargs": { + "temperature": llm_params["temperature"], + } + }) else: raise ValueError( "Model provided by the configuration not supported") diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py index 6b9ed637..28d1af14 100644 --- a/scrapegraphai/helpers/models_tokens.py +++ b/scrapegraphai/helpers/models_tokens.py @@ -43,5 +43,22 @@ "claude2": 9000, "claude2.1": 200000, "claude3": 200000 + }, + "bedrock": { + "anthropic.claude-3-haiku-20240307-v1:0": 200000, + "anthropic.claude-3-sonnet-20240229-v1:0": 200000, + "anthropic.claude-3-opus-20240229-v1:0": 200000, + "anthropic.claude-v2:1": 200000, + "anthropic.claude-v2": 100000, + "anthropic.claude-instant-v1": 100000, + "meta.llama3-8b-instruct-v1:0": 8192, + "meta.llama3-70b-instruct-v1:0": 8192, + "meta.llama2-13b-chat-v1": 4096, + "meta.llama2-70b-chat-v1": 4096, + "mistral.mistral-7b-instruct-v0:2": 32768, + "mistral.mixtral-8x7b-instruct-v0:1": 32768, + "mistral.mistral-large-2402-v1:0": 32768, + "cohere.embed-english-v3": 512, + "cohere.embed-multilingual-v3": 512 } } diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py index b81e376f..19751e4c 100644 --- a/scrapegraphai/models/__init__.py +++ b/scrapegraphai/models/__init__.py @@ -10,3 +10,4 @@ from .ollama import Ollama from .hugging_face import HuggingFace from .groq import Groq +from .bedrock import Bedrock diff --git a/scrapegraphai/models/bedrock.py b/scrapegraphai/models/bedrock.py new file mode 100644 index 00000000..b7cbe288 --- /dev/null +++ b/scrapegraphai/models/bedrock.py @@ -0,0 +1,19 @@ +""" +bedrock configuration wrapper +""" +from langchain_aws import ChatBedrock + + +class Bedrock(ChatBedrock): + """Class for wrapping bedrock module""" + + def __init__(self, llm_config: dict): + """ + A wrapper for the ChatBedrock class that provides default configuration + and could be extended with additional methods if needed. + + Args: + llm_config (dict): Configuration parameters for the language model. + """ + # Initialize the superclass (ChatBedrock) with provided config parameters + super().__init__(**llm_config) diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py index d10f50c6..64221743 100644 --- a/scrapegraphai/nodes/rag_node.py +++ b/scrapegraphai/nodes/rag_node.py @@ -6,12 +6,14 @@ from langchain.docstore.document import Document from langchain.retrievers import ContextualCompressionRetriever from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline +from langchain_aws.embeddings.bedrock import BedrockEmbeddings from langchain_community.document_transformers import EmbeddingsRedundantFilter from langchain_community.embeddings import HuggingFaceHubEmbeddings from langchain_community.vectorstores import FAISS from langchain_community.embeddings import OllamaEmbeddings from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings -from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace + +from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace, Bedrock from .base_node import BaseNode @@ -42,7 +44,8 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name: super().__init__(node_name, "node", input, output, 2, node_config) self.llm_model = node_config["llm"] self.embedder_model = node_config.get("embedder_model", None) - self.verbose = True if node_config is None else node_config.get("verbose", False) + self.verbose = True if node_config is None else node_config.get( + "verbose", False) def execute(self, state): """ @@ -82,7 +85,7 @@ def execute(self, state): }, ) chunked_docs.append(doc) - + if self.verbose: print("--- (updated chunks metadata) ---") @@ -104,6 +107,9 @@ def execute(self, state): embeddings = OllamaEmbeddings(**params) elif isinstance(embedding_model, HuggingFace): embeddings = HuggingFaceHubEmbeddings(model=embedding_model.model) + elif isinstance(embedding_model, Bedrock): + embeddings = BedrockEmbeddings( + client=None, model_id=embedding_model.model_id) else: raise ValueError("Embedding Model missing or not supported") From 1afa31910d25b2735abe0ad09dad433d6c2159fb Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Thu, 2 May 2024 16:33:51 +0200 Subject: [PATCH 2/4] fix: add to requirements.txt langchain-aws = "^0.1.2" --- requirements.txt | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 02aadac4..30e98b3d 100644 --- a/requirements.txt +++ b/requirements.txt @@ -13,4 +13,5 @@ google==3.0.0 minify-html==0.15.0 free-proxy==1.1.1 langchain-groq==0.1.3 -playwright==1.43.0 \ No newline at end of file +playwright==1.43.0 +langchain-aws = "^0.1.2" From db419058132456542e8cd9100918ede44b30041c Mon Sep 17 00:00:00 2001 From: VinciGit00 Date: Thu, 2 May 2024 17:57:04 +0200 Subject: [PATCH 3/4] Update requirements.txt --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 30e98b3d..b7c642d1 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,4 +14,4 @@ minify-html==0.15.0 free-proxy==1.1.1 langchain-groq==0.1.3 playwright==1.43.0 -langchain-aws = "^0.1.2" +langchain-aws==0.1.2 From 75a4042a232a5b69fd38d1666fea9633b4fd015e Mon Sep 17 00:00:00 2001 From: semantic-release-bot Date: Thu, 2 May 2024 15:58:40 +0000 Subject: [PATCH 4/4] ci(release): 0.6.1-beta.1 [skip ci] ## [0.6.1-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.0...v0.6.1-beta.1) (2024-05-02) ### Bug Fixes * add to requirements.txt langchain-aws = "^0.1.2" ([1afa319](https://github.com/VinciGit00/Scrapegraph-ai/commit/1afa31910d25b2735abe0ad09dad433d6c2159fb)) --- CHANGELOG.md | 7 +++++++ pyproject.toml | 2 +- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 87860fbb..dd613838 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,3 +1,10 @@ +## [0.6.1-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.0...v0.6.1-beta.1) (2024-05-02) + + +### Bug Fixes + +* add to requirements.txt langchain-aws = "^0.1.2" ([1afa319](https://github.com/VinciGit00/Scrapegraph-ai/commit/1afa31910d25b2735abe0ad09dad433d6c2159fb)) + ## [0.6.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.5.2...v0.6.0) (2024-05-02) diff --git a/pyproject.toml b/pyproject.toml index f3503dce..b6f39b23 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [tool.poetry] name = "scrapegraphai" -version = "0.6.0" +version = "0.6.1b1" description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."