Skip to content

Commit

Permalink
fix: add new embedding models
Browse files Browse the repository at this point in the history
  • Loading branch information
VinciGit00 committed Jun 18, 2024
1 parent 41964c5 commit 1d0cbbc
Show file tree
Hide file tree
Showing 2 changed files with 37 additions and 35 deletions.
67 changes: 33 additions & 34 deletions requirements-dev.lock
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@ altair==5.3.0
# via streamlit
annotated-types==0.7.0
# via pydantic
anthropic==0.26.1
anthropic==0.28.1
# via langchain-anthropic
anyio==4.3.0
anyio==4.4.0
# via anthropic
# via groq
# via httpx
Expand All @@ -42,17 +42,17 @@ beautifulsoup4==4.12.3
# via scrapegraphai
blinker==1.8.2
# via streamlit
boto3==1.34.113
boto3==1.34.127
# via langchain-aws
botocore==1.34.113
botocore==1.34.127
# via boto3
# via s3transfer
burr==0.22.1
# via scrapegraphai
cachetools==5.3.3
# via google-auth
# via streamlit
certifi==2024.2.2
certifi==2024.6.2
# via httpcore
# via httpx
# via requests
Expand All @@ -67,7 +67,7 @@ contourpy==1.2.1
# via matplotlib
cycler==0.12.1
# via matplotlib
dataclasses-json==0.6.6
dataclasses-json==0.6.7
# via langchain
# via langchain-community
defusedxml==0.7.1
Expand All @@ -80,27 +80,26 @@ dnspython==2.6.1
# via email-validator
docutils==0.19
# via sphinx
email-validator==2.1.1
email-validator==2.1.2
# via fastapi
faiss-cpu==1.8.0
# via scrapegraphai
fastapi==0.111.0
# via burr
# via fastapi-pagination
fastapi-cli==0.0.4
# via fastapi
fastapi-pagination==0.12.24
fastapi-pagination==0.12.25
# via burr
filelock==3.14.0
filelock==3.15.1
# via huggingface-hub
fonttools==4.52.1
fonttools==4.53.0
# via matplotlib
free-proxy==1.1.1
# via scrapegraphai
frozenlist==1.4.1
# via aiohttp
# via aiosignal
fsspec==2024.5.0
fsspec==2024.6.0
# via huggingface-hub
furo==2024.5.6
# via scrapegraphai
Expand All @@ -116,9 +115,9 @@ google-api-core==2.19.0
# via google-ai-generativelanguage
# via google-api-python-client
# via google-generativeai
google-api-python-client==2.130.0
google-api-python-client==2.133.0
# via google-generativeai
google-auth==2.29.0
google-auth==2.30.0
# via google-ai-generativelanguage
# via google-api-core
# via google-api-python-client
Expand All @@ -128,17 +127,17 @@ google-auth-httplib2==0.2.0
# via google-api-python-client
google-generativeai==0.5.4
# via langchain-google-genai
googleapis-common-protos==1.63.0
googleapis-common-protos==1.63.1
# via google-api-core
# via grpcio-status
graphviz==0.20.3
# via burr
# via scrapegraphai
greenlet==3.0.3
# via playwright
groq==0.8.0
groq==0.9.0
# via langchain-groq
grpcio==1.64.0
grpcio==1.64.1
# via google-api-core
# via grpcio-status
grpcio-status==1.62.2
Expand All @@ -160,7 +159,7 @@ httpx==0.27.0
# via fastapi
# via groq
# via openai
huggingface-hub==0.23.1
huggingface-hub==0.23.4
# via tokenizers
idna==3.7
# via anyio
Expand All @@ -178,15 +177,15 @@ jinja2==3.1.4
# via fastapi
# via pydeck
# via sphinx
jiter==0.4.0
jiter==0.4.2
# via anthropic
jmespath==1.0.1
# via boto3
# via botocore
jsonpatch==1.33
# via langchain
# via langchain-core
jsonpointer==2.4
jsonpointer==3.0.0
# via jsonpatch
jsonschema==4.22.0
# via altair
Expand Down Expand Up @@ -219,7 +218,7 @@ langchain-openai==0.1.6
# via scrapegraphai
langchain-text-splitters==0.0.2
# via langchain
langsmith==0.1.63
langsmith==0.1.77
# via langchain
# via langchain-community
# via langchain-core
Expand All @@ -231,7 +230,7 @@ markdown-it-py==3.0.0
# via rich
markupsafe==2.1.5
# via jinja2
marshmallow==3.21.2
marshmallow==3.21.3
# via dataclasses-json
matplotlib==3.9.0
# via burr
Expand All @@ -257,10 +256,10 @@ numpy==1.26.4
# via pydeck
# via sf-hamilton
# via streamlit
openai==1.30.3
openai==1.34.0
# via burr
# via langchain-openai
orjson==3.10.3
orjson==3.10.5
# via fastapi
# via langsmith
packaging==23.2
Expand Down Expand Up @@ -303,7 +302,7 @@ pyasn1==0.6.0
# via rsa
pyasn1-modules==0.4.0
# via google-auth
pydantic==2.7.1
pydantic==2.7.4
# via anthropic
# via burr
# via fastapi
Expand All @@ -314,7 +313,7 @@ pydantic==2.7.1
# via langchain-core
# via langsmith
# via openai
pydantic-core==2.18.2
pydantic-core==2.18.4
# via pydantic
pydeck==0.9.1
# via streamlit
Expand Down Expand Up @@ -352,7 +351,7 @@ referencing==0.35.1
# via jsonschema-specifications
regex==2024.5.15
# via tiktoken
requests==2.32.2
requests==2.32.3
# via burr
# via free-proxy
# via google-api-core
Expand All @@ -375,7 +374,7 @@ s3transfer==0.10.1
# via boto3
semchunk==1.0.1
# via scrapegraphai
sf-hamilton==1.63.0
sf-hamilton==1.66.1
# via burr
shellingham==1.5.4
# via typer
Expand Down Expand Up @@ -418,7 +417,7 @@ starlette==0.37.2
# via fastapi
streamlit==1.35.0
# via burr
tenacity==8.3.0
tenacity==8.4.1
# via langchain
# via langchain-community
# via langchain-core
Expand All @@ -432,7 +431,7 @@ toml==0.10.2
# via streamlit
toolz==0.12.1
# via altair
tornado==6.4
tornado==6.4.1
# via streamlit
tqdm==4.66.4
# via google-generativeai
Expand All @@ -442,7 +441,7 @@ tqdm==4.66.4
# via semchunk
typer==0.12.3
# via fastapi-cli
typing-extensions==4.12.0
typing-extensions==4.12.2
# via anthropic
# via fastapi
# via fastapi-pagination
Expand All @@ -469,15 +468,15 @@ undetected-playwright==0.3.0
# via scrapegraphai
uritemplate==4.1.1
# via google-api-python-client
urllib3==1.26.18
urllib3==2.2.2
# via botocore
# via requests
uvicorn==0.29.0
uvicorn==0.30.1
# via burr
# via fastapi
uvloop==0.19.0
# via uvicorn
watchfiles==0.21.0
watchfiles==0.22.0
# via uvicorn
websockets==12.0
# via uvicorn
Expand Down
5 changes: 4 additions & 1 deletion scrapegraphai/helpers/models_tokens.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,10 @@
"stablelm-zephyr": 8192,
"wizardlm2:8x22b": 65536,
# embedding models
"shaw/dmeta-embedding-zh": 8192,
"shaw/dmeta-embedding-zh-small-q4": 8192,
"shaw/dmeta-embedding-zh-q4": 8192,
"chevalblanc/acge_text_embedding": 8192,
"martcreation/dmeta-embedding-zh": 8192,
"snowflake-arctic-embed": 8192,
"mxbai-embed-large": 512
},
Expand Down

0 comments on commit 1d0cbbc

Please sign in to comment.