From dc2257053d4e61bd0ca940d7327293ca00459d21 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Tue, 10 Sep 2024 14:28:59 -0700 Subject: [PATCH 1/7] remove numpy version lock --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 21c1898b873..5ea5e43f38a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ 'chroma-hnswlib==0.7.6', 'fastapi >= 0.95.2', 'uvicorn[standard] >= 0.18.3', - 'numpy >= 1.22.5, < 2.0.0', + 'numpy >= 1.22.5', 'posthog >= 2.4.0', 'typing_extensions >= 4.5.0', 'onnxruntime >= 1.14.1', From b736380d09236b05c3d25839205809b41bb2d074 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 11 Sep 2024 10:21:45 -0700 Subject: [PATCH 2/7] add support for numpy2.0 --- chromadb/api/types.py | 2 +- chromadb/test/ef/test_multimodal_ef.py | 10 +++++----- pyproject.toml | 4 ++-- requirements.txt | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/chromadb/api/types.py b/chromadb/api/types.py index f0ffc1e6ca0..56c55a34355 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -103,7 +103,7 @@ def maybe_cast_one_to_many_document(target: OneOrMany[Document]) -> Documents: # Images -ImageDType = Union[np.uint, np.int_, np.float_] # type: ignore[name-defined] +ImageDType = Union[np.uint, np.int_, np.float64] Image = NDArray[ImageDType] Images = List[Image] diff --git a/chromadb/test/ef/test_multimodal_ef.py b/chromadb/test/ef/test_multimodal_ef.py index 82f66fea33e..d96cd5eca22 100644 --- a/chromadb/test/ef/test_multimodal_ef.py +++ b/chromadb/test/ef/test_multimodal_ef.py @@ -17,7 +17,7 @@ # then hashes them to a fixed dimension. class hashing_multimodal_ef(EmbeddingFunction[Embeddable]): def __init__(self) -> None: - self._hef = hashing_embedding_function(dim=10, dtype=np.float_) + self._hef = hashing_embedding_function(dim=10, dtype=np.float64) def __call__(self, input: Embeddable) -> Embeddings: to_texts = [str(i) for i in input] @@ -82,7 +82,7 @@ def test_multimodal( # get() should return all the documents and images # ids corresponding to images should not have documents - get_result = multimodal_collection.get(include=["documents"]) + get_result = multimodal_collection.get(include=["documents"]) # type: ignore[list-item] assert len(get_result["ids"]) == len(document_ids) + len(image_ids) for i, id in enumerate(get_result["ids"]): assert id in document_ids or id in image_ids @@ -124,14 +124,14 @@ def test_multimodal( # Query with images query_result = multimodal_collection.query( - query_images=[query_image], n_results=n_query_results, include=["documents"] + query_images=[query_image], n_results=n_query_results, include=["documents"] # type: ignore[list-item] ) assert query_result["ids"][0] == nearest_image_neighbor_ids # Query with documents query_result = multimodal_collection.query( - query_texts=[query_document], n_results=n_query_results, include=["documents"] + query_texts=[query_document], n_results=n_query_results, include=["documents"] # type: ignore[list-item] ) assert query_result["ids"][0] == nearest_document_neighbor_ids @@ -152,6 +152,6 @@ def test_multimodal_update_with_image( multimodal_collection.update(ids=id, images=image) - get_result = multimodal_collection.get(ids=id, include=["documents"]) + get_result = multimodal_collection.get(ids=id, include=["documents"]) # type: ignore[list-item] assert get_result["documents"] is not None assert get_result["documents"][0] is None diff --git a/pyproject.toml b/pyproject.toml index 5ea5e43f38a..25d01261fe3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,10 +20,10 @@ dependencies = [ 'chroma-hnswlib==0.7.6', 'fastapi >= 0.95.2', 'uvicorn[standard] >= 0.18.3', - 'numpy >= 1.22.5', + 'numpy >= 2.0.0', 'posthog >= 2.4.0', 'typing_extensions >= 4.5.0', - 'onnxruntime >= 1.14.1', + 'onnxruntime >= 1.19.0', 'opentelemetry-api>=1.2.0', 'opentelemetry-exporter-otlp-proto-grpc>=1.2.0', 'opentelemetry-instrumentation-fastapi>=0.41b0', diff --git a/requirements.txt b/requirements.txt index dfd08dccd8d..2cc4925adaa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,8 +7,8 @@ httpx>=0.27.0 importlib-resources kubernetes>=28.1.0 mmh3>=4.0.1 -numpy>=1.22.5, <2.0.0 -onnxruntime>=1.14.1 +numpy>=2.0.0 +onnxruntime>=1.19.0 opentelemetry-api>=1.2.0 opentelemetry-exporter-otlp-proto-grpc>=1.24.0 opentelemetry-instrumentation-fastapi>=0.41b0 From 6f189657a8f8cf7be562b4ec5bbc249be8d8b9f1 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 11 Sep 2024 12:54:15 -0700 Subject: [PATCH 3/7] add backward compatibility --- chromadb/api/types.py | 10 +++++++-- chromadb/test/property/strategies.py | 25 +++++++++++++++-------- chromadb/test/property/test_embeddings.py | 11 +++++++--- clients/python/pyproject.toml | 2 +- clients/python/requirements.txt | 2 +- pyproject.toml | 4 ++-- requirements.txt | 4 ++-- 7 files changed, 39 insertions(+), 19 deletions(-) diff --git a/chromadb/api/types.py b/chromadb/api/types.py index 56c55a34355..f676a9c8a6d 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -1,5 +1,6 @@ from typing import Optional, Union, TypeVar, List, Dict, Any, Tuple, cast from numpy.typing import NDArray +from packaging import version import numpy as np from typing_extensions import TypedDict, Protocol, runtime_checkable from enum import Enum @@ -103,8 +104,13 @@ def maybe_cast_one_to_many_document(target: OneOrMany[Document]) -> Documents: # Images -ImageDType = Union[np.uint, np.int_, np.float64] -Image = NDArray[ImageDType] +ImageDType = None +if version.parse(np.__version__) < version.parse("2.0.0"): + ImageDType = Union[np.uint, np.int_, np.float_] # type: ignore[attr-defined] +else: + ImageDType = Union[np.uint, np.int_, np.float64] + +Image = NDArray[ImageDType] # type: ignore[valid-type] Images = List[Image] diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 28f20b940c7..ceef46057ff 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -5,6 +5,7 @@ from typing_extensions import TypedDict import uuid import numpy as np +from packaging import version import numpy.typing as npt import chromadb.api.types as types import re @@ -148,7 +149,12 @@ def one_or_both( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./+" ) -float_types = [np.float16, np.float32, np.float64] +float_types = None +if version.parse(np.__version__) < version.parse("2.0.0"): + float_types = [np.float16, np.float32, np.float_] # type: ignore[attr-defined] +else: + float_types = [np.float16, np.float32, np.float64] + int_types = [np.int16, np.int32, np.int64] # TODO: handle int types @@ -194,7 +200,7 @@ def create_embeddings_ndarray( dim: int, count: int, dtype: npt.DTypeLike, -) -> np.typing.NDArray[Any]: +) -> npt.NDArray[Any]: return np.random.uniform( low=-1.0, high=1.0, @@ -295,7 +301,7 @@ def collections( name = draw(collection_name()) metadata = draw(collection_metadata) dimension = draw(st.integers(min_value=2, max_value=2048)) - dtype = draw(st.sampled_from(float_types)) + dtype = draw(st.sampled_from(float_types)) # type: ignore[arg-type] use_persistent_hnsw_params = draw(with_persistent_hnsw_params) @@ -376,7 +382,10 @@ def collections( @st.composite def metadata( - draw: st.DrawFn, collection: Collection, min_size=0, max_size=None + draw: st.DrawFn, + collection: Collection, + min_size: int = 0, + max_size: Optional[int] = None, ) -> Optional[types.Metadata]: """Strategy for generating metadata that could be a part of the given collection""" # First draw a random dictionary. @@ -429,7 +438,7 @@ def document(draw: st.DrawFn, collection: Collection) -> types.Document: # Blacklist certain unicode characters that affect sqlite processing. # For example, the null (/x00) character makes sqlite stop processing a string. - blacklist_categories = ("Cc", "Cs") + blacklist_categories = ("Cc", "Cs") # type: ignore[assignment] if collection.known_document_keywords: known_words_st = st.sampled_from(collection.known_document_keywords) else: @@ -553,7 +562,7 @@ def where_clause(draw: st.DrawFn, collection: Collection) -> types.Where: if not NOT_CLUSTER_ONLY: legal_ops: List[Optional[str]] = [None, "$eq"] else: - legal_ops: List[Optional[str]] = [None, "$eq", "$ne", "$in", "$nin"] + legal_ops: List[Optional[str]] = [None, "$eq", "$ne", "$in", "$nin"] # type: ignore[no-redef] if not isinstance(value, str) and not isinstance(value, bool): legal_ops.extend(["$gt", "$lt", "$lte", "$gte"]) @@ -605,10 +614,10 @@ def where_doc_clause(draw: st.DrawFn, collection: Collection) -> types.WhereDocu else: op = draw(st.sampled_from(["$contains", "$not_contains"])) - if op == "$contains": + if op == "$contains": # type: ignore[comparison-overlap] return {"$contains": word} else: - assert op == "$not_contains" + assert op == "$not_contains" # type: ignore[comparison-overlap] return {"$not_contains": word} diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index dc53bbc52d7..84fb99852e5 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -4,6 +4,7 @@ import pytest import logging import hypothesis +from packaging import version import hypothesis.strategies as st from hypothesis import given, settings, HealthCheck from typing import Dict, Set, cast, Union, DefaultDict, Any, List @@ -51,9 +52,13 @@ def print_traces() -> None: print(f"{key}: {value}") -dtype_shared_st: st.SearchStrategy[ - Union[np.float16, np.float32, np.float64] -] = st.shared(st.sampled_from(strategies.float_types), key="dtype") +SearchStrategyType = None +if version.parse(np.__version__) < version.parse("2.0.0"): + SearchStrategyType = Union[np.float16, np.float32, np.float_] # type: ignore[attr-defined] +else: + SearchStrategyType = Union[np.float16, np.float32, np.float64] + +dtype_shared_st: SearchStrategyType = st.shared(st.sampled_from(strategies.float_types), key="dtype") # type: ignore[valid-type, arg-type] dimension_shared_st: st.SearchStrategy[int] = st.shared( st.integers(min_value=2, max_value=2048), key="dimension" diff --git a/clients/python/pyproject.toml b/clients/python/pyproject.toml index c7e1486d0c1..f9dfb51d9af 100644 --- a/clients/python/pyproject.toml +++ b/clients/python/pyproject.toml @@ -15,7 +15,7 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - 'numpy >= 1.22.5, < 2.0.0', + 'numpy >= 1.22.5', 'opentelemetry-api>=1.2.0', 'opentelemetry-exporter-otlp-proto-grpc>=1.2.0', 'opentelemetry-sdk>=1.2.0', diff --git a/clients/python/requirements.txt b/clients/python/requirements.txt index 0c83eb72520..88afd9cda8a 100644 --- a/clients/python/requirements.txt +++ b/clients/python/requirements.txt @@ -1,5 +1,5 @@ httpx>=0.27.0 -numpy >= 1.22.5, < 2.0.0 +numpy >= 1.22.5 opentelemetry-api>=1.2.0 opentelemetry-exporter-otlp-proto-grpc>=1.2.0 opentelemetry-sdk>=1.2.0 diff --git a/pyproject.toml b/pyproject.toml index 25d01261fe3..5ea5e43f38a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,10 +20,10 @@ dependencies = [ 'chroma-hnswlib==0.7.6', 'fastapi >= 0.95.2', 'uvicorn[standard] >= 0.18.3', - 'numpy >= 2.0.0', + 'numpy >= 1.22.5', 'posthog >= 2.4.0', 'typing_extensions >= 4.5.0', - 'onnxruntime >= 1.19.0', + 'onnxruntime >= 1.14.1', 'opentelemetry-api>=1.2.0', 'opentelemetry-exporter-otlp-proto-grpc>=1.2.0', 'opentelemetry-instrumentation-fastapi>=0.41b0', diff --git a/requirements.txt b/requirements.txt index 2cc4925adaa..b7b621faf2a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,8 +7,8 @@ httpx>=0.27.0 importlib-resources kubernetes>=28.1.0 mmh3>=4.0.1 -numpy>=2.0.0 -onnxruntime>=1.19.0 +numpy>=1.22.5 +onnxruntime>=1.14.1 opentelemetry-api>=1.2.0 opentelemetry-exporter-otlp-proto-grpc>=1.24.0 opentelemetry-instrumentation-fastapi>=0.41b0 From 8552d0e36fdb7ddc869884370842ec6ca29f0e6a Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Tue, 10 Sep 2024 14:28:59 -0700 Subject: [PATCH 4/7] remove numpy version lock --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 21c1898b873..5ea5e43f38a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,7 +20,7 @@ dependencies = [ 'chroma-hnswlib==0.7.6', 'fastapi >= 0.95.2', 'uvicorn[standard] >= 0.18.3', - 'numpy >= 1.22.5, < 2.0.0', + 'numpy >= 1.22.5', 'posthog >= 2.4.0', 'typing_extensions >= 4.5.0', 'onnxruntime >= 1.14.1', From bc99fdd93016efa2d607333a6694791f129919c3 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 11 Sep 2024 10:21:45 -0700 Subject: [PATCH 5/7] add support for numpy2.0 --- chromadb/api/types.py | 2 +- chromadb/test/ef/test_multimodal_ef.py | 10 +++++----- pyproject.toml | 4 ++-- requirements.txt | 4 ++-- 4 files changed, 10 insertions(+), 10 deletions(-) diff --git a/chromadb/api/types.py b/chromadb/api/types.py index f0ffc1e6ca0..56c55a34355 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -103,7 +103,7 @@ def maybe_cast_one_to_many_document(target: OneOrMany[Document]) -> Documents: # Images -ImageDType = Union[np.uint, np.int_, np.float_] # type: ignore[name-defined] +ImageDType = Union[np.uint, np.int_, np.float64] Image = NDArray[ImageDType] Images = List[Image] diff --git a/chromadb/test/ef/test_multimodal_ef.py b/chromadb/test/ef/test_multimodal_ef.py index 82f66fea33e..d96cd5eca22 100644 --- a/chromadb/test/ef/test_multimodal_ef.py +++ b/chromadb/test/ef/test_multimodal_ef.py @@ -17,7 +17,7 @@ # then hashes them to a fixed dimension. class hashing_multimodal_ef(EmbeddingFunction[Embeddable]): def __init__(self) -> None: - self._hef = hashing_embedding_function(dim=10, dtype=np.float_) + self._hef = hashing_embedding_function(dim=10, dtype=np.float64) def __call__(self, input: Embeddable) -> Embeddings: to_texts = [str(i) for i in input] @@ -82,7 +82,7 @@ def test_multimodal( # get() should return all the documents and images # ids corresponding to images should not have documents - get_result = multimodal_collection.get(include=["documents"]) + get_result = multimodal_collection.get(include=["documents"]) # type: ignore[list-item] assert len(get_result["ids"]) == len(document_ids) + len(image_ids) for i, id in enumerate(get_result["ids"]): assert id in document_ids or id in image_ids @@ -124,14 +124,14 @@ def test_multimodal( # Query with images query_result = multimodal_collection.query( - query_images=[query_image], n_results=n_query_results, include=["documents"] + query_images=[query_image], n_results=n_query_results, include=["documents"] # type: ignore[list-item] ) assert query_result["ids"][0] == nearest_image_neighbor_ids # Query with documents query_result = multimodal_collection.query( - query_texts=[query_document], n_results=n_query_results, include=["documents"] + query_texts=[query_document], n_results=n_query_results, include=["documents"] # type: ignore[list-item] ) assert query_result["ids"][0] == nearest_document_neighbor_ids @@ -152,6 +152,6 @@ def test_multimodal_update_with_image( multimodal_collection.update(ids=id, images=image) - get_result = multimodal_collection.get(ids=id, include=["documents"]) + get_result = multimodal_collection.get(ids=id, include=["documents"]) # type: ignore[list-item] assert get_result["documents"] is not None assert get_result["documents"][0] is None diff --git a/pyproject.toml b/pyproject.toml index 5ea5e43f38a..25d01261fe3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -20,10 +20,10 @@ dependencies = [ 'chroma-hnswlib==0.7.6', 'fastapi >= 0.95.2', 'uvicorn[standard] >= 0.18.3', - 'numpy >= 1.22.5', + 'numpy >= 2.0.0', 'posthog >= 2.4.0', 'typing_extensions >= 4.5.0', - 'onnxruntime >= 1.14.1', + 'onnxruntime >= 1.19.0', 'opentelemetry-api>=1.2.0', 'opentelemetry-exporter-otlp-proto-grpc>=1.2.0', 'opentelemetry-instrumentation-fastapi>=0.41b0', diff --git a/requirements.txt b/requirements.txt index dfd08dccd8d..2cc4925adaa 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,8 +7,8 @@ httpx>=0.27.0 importlib-resources kubernetes>=28.1.0 mmh3>=4.0.1 -numpy>=1.22.5, <2.0.0 -onnxruntime>=1.14.1 +numpy>=2.0.0 +onnxruntime>=1.19.0 opentelemetry-api>=1.2.0 opentelemetry-exporter-otlp-proto-grpc>=1.24.0 opentelemetry-instrumentation-fastapi>=0.41b0 From b7e0c085399d283567b7d8571d72572a41239b56 Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Wed, 11 Sep 2024 12:58:25 -0700 Subject: [PATCH 6/7] fix version --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 3279005cd59..5ea5e43f38a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,7 @@ dependencies = [ 'numpy >= 1.22.5', 'posthog >= 2.4.0', 'typing_extensions >= 4.5.0', - 'onnxruntime >= 1.19.0', + 'onnxruntime >= 1.14.1', 'opentelemetry-api>=1.2.0', 'opentelemetry-exporter-otlp-proto-grpc>=1.2.0', 'opentelemetry-instrumentation-fastapi>=0.41b0', From 74a8394163e7e3797e6c96266fb8970a7d65f82f Mon Sep 17 00:00:00 2001 From: Spike Lu Date: Thu, 12 Sep 2024 11:55:06 -0700 Subject: [PATCH 7/7] handle backward compatability --- chromadb/test/ef/test_multimodal_ef.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/chromadb/test/ef/test_multimodal_ef.py b/chromadb/test/ef/test_multimodal_ef.py index d96cd5eca22..d24e07907b4 100644 --- a/chromadb/test/ef/test_multimodal_ef.py +++ b/chromadb/test/ef/test_multimodal_ef.py @@ -1,6 +1,7 @@ from typing import Generator, cast import numpy as np import pytest +from packaging import version import chromadb from chromadb.api.types import ( Embeddable, @@ -17,7 +18,10 @@ # then hashes them to a fixed dimension. class hashing_multimodal_ef(EmbeddingFunction[Embeddable]): def __init__(self) -> None: - self._hef = hashing_embedding_function(dim=10, dtype=np.float64) + if version.parse(np.__version__) < version.parse("2.0.0"): + self._hef = hashing_embedding_function(dim=10, dtype=np.float_) # type: ignore[attr-defined] + else: + self._hef = hashing_embedding_function(dim=10, dtype=np.float64) def __call__(self, input: Embeddable) -> Embeddings: to_texts = [str(i) for i in input]