From 16de49fa086547351704c16989fa1bd2d2efb363 Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Thu, 2 May 2024 13:47:17 +0200
Subject: [PATCH 1/4] add integration for bedrock

Co-Authored-By: redrusty2 <15157208+redrusty2@users.noreply.github.com>
---
 pyproject.toml                         |  2 ++
 scrapegraphai/graphs/abstract_graph.py | 22 +++++++++++++++++++---
 scrapegraphai/helpers/models_tokens.py | 17 +++++++++++++++++
 scrapegraphai/models/__init__.py       |  1 +
 scrapegraphai/models/bedrock.py        | 19 +++++++++++++++++++
 scrapegraphai/nodes/rag_node.py        | 12 +++++++++---
 6 files changed, 67 insertions(+), 6 deletions(-)
 create mode 100644 scrapegraphai/models/bedrock.py

diff --git a/pyproject.toml b/pyproject.toml
index bed10980..5a6b6e12 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -40,6 +40,8 @@ minify-html = "0.15.0"
 free-proxy = "1.1.1"
 langchain-groq = "0.1.3"
 playwright = "^1.43.0"
+langchain-aws = "^0.1.2"
+
 
 [tool.poetry.dev-dependencies]
 pytest = "8.0.0"
diff --git a/scrapegraphai/graphs/abstract_graph.py b/scrapegraphai/graphs/abstract_graph.py
index 5adf8ba6..12c3c39a 100644
--- a/scrapegraphai/graphs/abstract_graph.py
+++ b/scrapegraphai/graphs/abstract_graph.py
@@ -3,7 +3,8 @@
 """
 from abc import ABC, abstractmethod
 from typing import Optional
-from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq
+
+from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq, Bedrock
 from ..helpers import models_tokens
 
 
@@ -25,7 +26,8 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):
 
         # Set common configuration parameters
         self.verbose = True if config is None else config.get("verbose", False)
-        self.headless = True if config is None else config.get("headless", True)
+        self.headless = True if config is None else config.get(
+            "headless", True)
 
         # Create the graph
         self.graph = self._create_graph()
@@ -92,12 +94,26 @@ def _create_llm(self, llm_config: dict):
             return HuggingFace(llm_params)
         elif "groq" in llm_params["model"]:
             llm_params["model"] = llm_params["model"].split("/")[-1]
-            
+
             try:
                 self.model_token = models_tokens["groq"][llm_params["model"]]
             except KeyError:
                 raise KeyError("Model not supported")
             return Groq(llm_params)
+        elif "bedrock" in llm_params["model"]:
+            llm_params["model"] = llm_params["model"].split("/")[-1]
+            model_id = llm_params["model"]
+
+            try:
+                self.model_token = models_tokens["bedrock"][llm_params["model"]]
+            except KeyError:
+                raise KeyError("Model not supported")
+            return Bedrock({
+                "model_id": model_id,
+                "model_kwargs": {
+                    "temperature": llm_params["temperature"],
+                }
+            })
         else:
             raise ValueError(
                 "Model provided by the configuration not supported")
diff --git a/scrapegraphai/helpers/models_tokens.py b/scrapegraphai/helpers/models_tokens.py
index 6b9ed637..28d1af14 100644
--- a/scrapegraphai/helpers/models_tokens.py
+++ b/scrapegraphai/helpers/models_tokens.py
@@ -43,5 +43,22 @@
         "claude2": 9000,
         "claude2.1": 200000,
         "claude3": 200000
+    },
+    "bedrock": {
+        "anthropic.claude-3-haiku-20240307-v1:0": 200000,
+        "anthropic.claude-3-sonnet-20240229-v1:0": 200000,
+        "anthropic.claude-3-opus-20240229-v1:0": 200000,
+        "anthropic.claude-v2:1": 200000,
+        "anthropic.claude-v2": 100000,
+        "anthropic.claude-instant-v1": 100000,
+        "meta.llama3-8b-instruct-v1:0": 8192,
+        "meta.llama3-70b-instruct-v1:0": 8192,
+        "meta.llama2-13b-chat-v1": 4096,
+        "meta.llama2-70b-chat-v1": 4096,
+        "mistral.mistral-7b-instruct-v0:2": 32768,
+        "mistral.mixtral-8x7b-instruct-v0:1": 32768,
+        "mistral.mistral-large-2402-v1:0": 32768,
+        "cohere.embed-english-v3": 512,
+        "cohere.embed-multilingual-v3": 512
     }
 }
diff --git a/scrapegraphai/models/__init__.py b/scrapegraphai/models/__init__.py
index b81e376f..19751e4c 100644
--- a/scrapegraphai/models/__init__.py
+++ b/scrapegraphai/models/__init__.py
@@ -10,3 +10,4 @@
 from .ollama import Ollama
 from .hugging_face import HuggingFace
 from .groq import Groq
+from .bedrock import Bedrock
diff --git a/scrapegraphai/models/bedrock.py b/scrapegraphai/models/bedrock.py
new file mode 100644
index 00000000..b7cbe288
--- /dev/null
+++ b/scrapegraphai/models/bedrock.py
@@ -0,0 +1,19 @@
+""" 
+bedrock configuration wrapper
+"""
+from langchain_aws import ChatBedrock
+
+
+class Bedrock(ChatBedrock):
+    """Class for wrapping bedrock module"""
+
+    def __init__(self, llm_config: dict):
+        """
+        A wrapper for the ChatBedrock class that provides default configuration
+        and could be extended with additional methods if needed.
+
+        Args:
+            llm_config (dict): Configuration parameters for the language model.
+        """
+        # Initialize the superclass (ChatBedrock) with provided config parameters
+        super().__init__(**llm_config)
diff --git a/scrapegraphai/nodes/rag_node.py b/scrapegraphai/nodes/rag_node.py
index d10f50c6..64221743 100644
--- a/scrapegraphai/nodes/rag_node.py
+++ b/scrapegraphai/nodes/rag_node.py
@@ -6,12 +6,14 @@
 from langchain.docstore.document import Document
 from langchain.retrievers import ContextualCompressionRetriever
 from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
+from langchain_aws.embeddings.bedrock import BedrockEmbeddings
 from langchain_community.document_transformers import EmbeddingsRedundantFilter
 from langchain_community.embeddings import HuggingFaceHubEmbeddings
 from langchain_community.vectorstores import FAISS
 from langchain_community.embeddings import OllamaEmbeddings
 from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
-from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace
+
+from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace, Bedrock
 from .base_node import BaseNode
 
 
@@ -42,7 +44,8 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name:
         super().__init__(node_name, "node", input, output, 2, node_config)
         self.llm_model = node_config["llm"]
         self.embedder_model = node_config.get("embedder_model", None)
-        self.verbose = True if node_config is None else node_config.get("verbose", False)
+        self.verbose = True if node_config is None else node_config.get(
+            "verbose", False)
 
     def execute(self, state):
         """
@@ -82,7 +85,7 @@ def execute(self, state):
                 },
             )
             chunked_docs.append(doc)
-        
+
         if self.verbose:
             print("--- (updated chunks metadata) ---")
 
@@ -104,6 +107,9 @@ def execute(self, state):
             embeddings = OllamaEmbeddings(**params)
         elif isinstance(embedding_model, HuggingFace):
             embeddings = HuggingFaceHubEmbeddings(model=embedding_model.model)
+        elif isinstance(embedding_model, Bedrock):
+            embeddings = BedrockEmbeddings(
+                client=None, model_id=embedding_model.model_id)
         else:
             raise ValueError("Embedding Model missing or not supported")
 

From 1afa31910d25b2735abe0ad09dad433d6c2159fb Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Thu, 2 May 2024 16:33:51 +0200
Subject: [PATCH 2/4] fix: add to requirements.txt langchain-aws = "^0.1.2"

---
 requirements.txt | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 02aadac4..30e98b3d 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -13,4 +13,5 @@ google==3.0.0
 minify-html==0.15.0
 free-proxy==1.1.1
 langchain-groq==0.1.3
-playwright==1.43.0
\ No newline at end of file
+playwright==1.43.0
+langchain-aws = "^0.1.2"

From db419058132456542e8cd9100918ede44b30041c Mon Sep 17 00:00:00 2001
From: VinciGit00 <mvincig11@gmail.com>
Date: Thu, 2 May 2024 17:57:04 +0200
Subject: [PATCH 3/4] Update requirements.txt

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 30e98b3d..b7c642d1 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -14,4 +14,4 @@ minify-html==0.15.0
 free-proxy==1.1.1
 langchain-groq==0.1.3
 playwright==1.43.0
-langchain-aws = "^0.1.2"
+langchain-aws==0.1.2

From 75a4042a232a5b69fd38d1666fea9633b4fd015e Mon Sep 17 00:00:00 2001
From: semantic-release-bot <semantic-release-bot@martynus.net>
Date: Thu, 2 May 2024 15:58:40 +0000
Subject: [PATCH 4/4] ci(release): 0.6.1-beta.1 [skip ci]

## [0.6.1-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.0...v0.6.1-beta.1) (2024-05-02)

### Bug Fixes

* add to requirements.txt langchain-aws = "^0.1.2" ([1afa319](https://github.com/VinciGit00/Scrapegraph-ai/commit/1afa31910d25b2735abe0ad09dad433d6c2159fb))
---
 CHANGELOG.md   | 7 +++++++
 pyproject.toml | 2 +-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/CHANGELOG.md b/CHANGELOG.md
index 87860fbb..dd613838 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,3 +1,10 @@
+## [0.6.1-beta.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.0...v0.6.1-beta.1) (2024-05-02)
+
+
+### Bug Fixes
+
+* add to requirements.txt langchain-aws = "^0.1.2" ([1afa319](https://github.com/VinciGit00/Scrapegraph-ai/commit/1afa31910d25b2735abe0ad09dad433d6c2159fb))
+
 ## [0.6.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.5.2...v0.6.0) (2024-05-02)
 
 
diff --git a/pyproject.toml b/pyproject.toml
index f3503dce..b6f39b23 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,7 +1,7 @@
 [tool.poetry]
 name = "scrapegraphai"
 
-version = "0.6.0"
+version = "0.6.1b1"
 
 
 description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."