Skip to content

Commit

Permalink
Merge branch 'pre/beta' into main
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 authored May 2, 2024
2 parents faa3498 + 75a4042 commit 96ebcfc
Show file tree
Hide file tree
Showing 8 changed files with 71 additions and 8 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
## [0.6.1](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.6.0...v0.6.1) (2024-05-02)



### Bug Fixes

* gemini errror ([2ea54ea](https://github.com/VinciGit00/Scrapegraph-ai/commit/2ea54eab1d070e177c7d5ecfcc032b325fbd7c12))


## [0.6.0](https://github.com/VinciGit00/Scrapegraph-ai/compare/v0.5.2...v0.6.0) (2024-05-02)


Expand Down
3 changes: 2 additions & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@ name = "scrapegraphai"

version = "0.6.1"


description = "A web scraping library based on LangChain which uses LLM and direct graph logic to create scraping pipelines."
authors = [
"Marco Vinciguerra <[email protected]>",
Expand Down Expand Up @@ -41,6 +40,8 @@ minify-html = "0.15.0"
free-proxy = "1.1.1"
langchain-groq = "0.1.3"
playwright = "^1.43.0"
langchain-aws = "^0.1.2"


[tool.poetry.dev-dependencies]
pytest = "8.0.0"
Expand Down
3 changes: 2 additions & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -13,4 +13,5 @@ google==3.0.0
minify-html==0.15.0
free-proxy==1.1.1
langchain-groq==0.1.3
playwright==1.43.0
playwright==1.43.0
langchain-aws==0.1.2
22 changes: 19 additions & 3 deletions scrapegraphai/graphs/abstract_graph.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,8 @@

from abc import ABC, abstractmethod
from typing import Optional
from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq

from ..models import OpenAI, Gemini, Ollama, AzureOpenAI, HuggingFace, Groq, Bedrock
from ..helpers import models_tokens


Expand Down Expand Up @@ -47,7 +48,8 @@ def __init__(self, prompt: str, config: dict, source: Optional[str] = None):

# Set common configuration parameters
self.verbose = True if config is None else config.get("verbose", False)
self.headless = True if config is None else config.get("headless", True)
self.headless = True if config is None else config.get(
"headless", True)

# Create the graph
self.graph = self._create_graph()
Expand Down Expand Up @@ -140,12 +142,26 @@ def _create_llm(self, llm_config: dict, chat=False) -> object:
return HuggingFace(llm_params)
elif "groq" in llm_params["model"]:
llm_params["model"] = llm_params["model"].split("/")[-1]

try:
self.model_token = models_tokens["groq"][llm_params["model"]]
except KeyError:
raise KeyError("Model not supported")
return Groq(llm_params)
elif "bedrock" in llm_params["model"]:
llm_params["model"] = llm_params["model"].split("/")[-1]
model_id = llm_params["model"]

try:
self.model_token = models_tokens["bedrock"][llm_params["model"]]
except KeyError:
raise KeyError("Model not supported")
return Bedrock({
"model_id": model_id,
"model_kwargs": {
"temperature": llm_params["temperature"],
}
})
else:
raise ValueError(
"Model provided by the configuration not supported")
Expand Down
17 changes: 17 additions & 0 deletions scrapegraphai/helpers/models_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -48,5 +48,22 @@
"claude2": 9000,
"claude2.1": 200000,
"claude3": 200000
},
"bedrock": {
"anthropic.claude-3-haiku-20240307-v1:0": 200000,
"anthropic.claude-3-sonnet-20240229-v1:0": 200000,
"anthropic.claude-3-opus-20240229-v1:0": 200000,
"anthropic.claude-v2:1": 200000,
"anthropic.claude-v2": 100000,
"anthropic.claude-instant-v1": 100000,
"meta.llama3-8b-instruct-v1:0": 8192,
"meta.llama3-70b-instruct-v1:0": 8192,
"meta.llama2-13b-chat-v1": 4096,
"meta.llama2-70b-chat-v1": 4096,
"mistral.mistral-7b-instruct-v0:2": 32768,
"mistral.mixtral-8x7b-instruct-v0:1": 32768,
"mistral.mistral-large-2402-v1:0": 32768,
"cohere.embed-english-v3": 512,
"cohere.embed-multilingual-v3": 512
}
}
1 change: 1 addition & 0 deletions scrapegraphai/models/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,4 @@
from .ollama import Ollama
from .hugging_face import HuggingFace
from .groq import Groq
from .bedrock import Bedrock
19 changes: 19 additions & 0 deletions scrapegraphai/models/bedrock.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
"""
bedrock configuration wrapper
"""
from langchain_aws import ChatBedrock


class Bedrock(ChatBedrock):
"""Class for wrapping bedrock module"""

def __init__(self, llm_config: dict):
"""
A wrapper for the ChatBedrock class that provides default configuration
and could be extended with additional methods if needed.
Args:
llm_config (dict): Configuration parameters for the language model.
"""
# Initialize the superclass (ChatBedrock) with provided config parameters
super().__init__(**llm_config)
12 changes: 9 additions & 3 deletions scrapegraphai/nodes/rag_node.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,12 +6,14 @@
from langchain.docstore.document import Document
from langchain.retrievers import ContextualCompressionRetriever
from langchain.retrievers.document_compressors import EmbeddingsFilter, DocumentCompressorPipeline
from langchain_aws.embeddings.bedrock import BedrockEmbeddings
from langchain_community.document_transformers import EmbeddingsRedundantFilter
from langchain_community.embeddings import HuggingFaceHubEmbeddings
from langchain_community.vectorstores import FAISS
from langchain_community.embeddings import OllamaEmbeddings
from langchain_openai import OpenAIEmbeddings, AzureOpenAIEmbeddings
from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace

from ..models import OpenAI, Ollama, AzureOpenAI, HuggingFace, Bedrock
from .base_node import BaseNode


Expand Down Expand Up @@ -39,7 +41,8 @@ def __init__(self, input: str, output: List[str], node_config: dict, node_name:

self.llm_model = node_config["llm"]
self.embedder_model = node_config.get("embedder_model", None)
self.verbose = True if node_config is None else node_config.get("verbose", False)
self.verbose = True if node_config is None else node_config.get(
"verbose", False)

def execute(self, state: dict) -> dict:
"""
Expand Down Expand Up @@ -80,7 +83,7 @@ def execute(self, state: dict) -> dict:
},
)
chunked_docs.append(doc)

if self.verbose:
print("--- (updated chunks metadata) ---")

Expand All @@ -104,6 +107,9 @@ def execute(self, state: dict) -> dict:
embeddings = OllamaEmbeddings(**params)
elif isinstance(embedding_model, HuggingFace):
embeddings = HuggingFaceHubEmbeddings(model=embedding_model.model)
elif isinstance(embedding_model, Bedrock):
embeddings = BedrockEmbeddings(
client=None, model_id=embedding_model.model_id)
else:
raise ValueError("Embedding Model missing or not supported")

Expand Down

0 comments on commit 96ebcfc

Please sign in to comment.