From b7cbc61c10cefad45322e057e57a89e4e1602462 Mon Sep 17 00:00:00 2001
From: Ben Eggers <ben@beneggers.com>
Date: Fri, 23 Feb 2024 14:58:37 -0800
Subject: [PATCH] Fix test flakiness

---
 chromadb/test/property/strategies.py       |  17 ++-
 chromadb/test/property/test_collections.py | 132 +++++++++++++++++----
 2 files changed, 122 insertions(+), 27 deletions(-)

diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py
index 89def8ac316..785d2775723 100644
--- a/chromadb/test/property/strategies.py
+++ b/chromadb/test/property/strategies.py
@@ -236,10 +236,21 @@ def embedding_function_strategy(
 
 
 @dataclass
-class Collection:
+class ExternalCollection:
+    """
+    An external view of a collection.
+    """
     name: str
-    id: uuid.UUID
     metadata: Optional[types.Metadata]
+    embedding_function: Optional[types.EmbeddingFunction[Embeddable]]
+
+
+@dataclass
+class Collection(ExternalCollection):
+    """
+    An internal view of a collection.
+    """
+    id: uuid.UUID
     dimension: int
     dtype: npt.DTypeLike
     topic: str
@@ -247,7 +258,7 @@ class Collection:
     known_document_keywords: List[str]
     has_documents: bool = False
     has_embeddings: bool = False
-    embedding_function: Optional[types.EmbeddingFunction[Embeddable]] = None
+
 
 @st.composite
 def collections(
diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py
index 251dfa74f38..18ef6f609fe 100644
--- a/chromadb/test/property/test_collections.py
+++ b/chromadb/test/property/test_collections.py
@@ -14,11 +14,15 @@
     run_state_machine_as_test,
     MultipleResults,
 )
-from typing import Dict, Optional, Any, Mapping
+from typing import Dict, Optional
+
+import numpy
+import uuid
+from chromadb.test.property.strategies import hashing_embedding_function
 
 
 class CollectionStateMachine(RuleBasedStateMachine):
-    collections: Bundle[strategies.Collection]
+    collections: Bundle[strategies.ExternalCollection]
     _model: Dict[str, Optional[types.CollectionMetadata]]
 
     collections = Bundle("collections")
@@ -35,8 +39,8 @@ def initialize(self) -> None:
 
     @rule(target=collections, coll=strategies.collections())
     def create_coll(
-        self, coll: strategies.Collection
-    ) -> MultipleResults[strategies.Collection]:
+        self, coll: strategies.ExternalCollection
+    ) -> MultipleResults[strategies.ExternalCollection]:
         # Metadata can either be None or a non-empty dict
         if coll.name in self.model or (
             coll.metadata is not None and len(coll.metadata) == 0
@@ -54,14 +58,14 @@ def create_coll(
             metadata=coll.metadata,
             embedding_function=coll.embedding_function,
         )
-        self.set_model(coll.name, coll.metadata, str(coll.id))
+        self.set_model(coll.name, coll.metadata)
 
         assert c.name == coll.name
         assert c.metadata == self.model[coll.name]
         return multiple(coll)
 
     @rule(coll=collections)
-    def get_coll(self, coll: strategies.Collection) -> None:
+    def get_coll(self, coll: strategies.ExternalCollection) -> None:
         if coll.name in self.model:
             c = self.api.get_collection(name=coll.name)
             assert c.name == coll.name
@@ -71,7 +75,7 @@ def get_coll(self, coll: strategies.Collection) -> None:
                 self.api.get_collection(name=coll.name)
 
     @rule(coll=consumes(collections))
-    def delete_coll(self, coll: strategies.Collection) -> None:
+    def delete_coll(self, coll: strategies.ExternalCollection) -> None:
         if coll.name in self.model:
             self.api.delete_collection(name=coll.name)
             self.delete_from_model(coll.name)
@@ -85,7 +89,7 @@ def delete_coll(self, coll: strategies.Collection) -> None:
     @rule()
     def list_collections(self) -> None:
         colls = self.api.list_collections()
-        assert len(colls) == len([c for c in self.model if not c.startswith("__id__")])
+        assert len(colls) == len(self.model)
         for c in colls:
             assert c.name in self.model
 
@@ -119,9 +123,9 @@ def list_collections_with_limit_offset(self, limit: int, offset: int) -> None:
     )
     def get_or_create_coll(
         self,
-        coll: strategies.Collection,
+        coll: strategies.ExternalCollection,
         new_metadata: Optional[types.Metadata],
-    ) -> MultipleResults[strategies.Collection]:
+    ) -> MultipleResults[strategies.ExternalCollection]:
         # Cases for get_or_create
 
         # Case 0
@@ -163,7 +167,7 @@ def get_or_create_coll(
             coll.metadata = (
                 self.model[coll.name] if new_metadata is None else new_metadata
             )
-        self.set_model(coll.name, coll.metadata, str(coll.id))
+        self.set_model(coll.name, coll.metadata)
 
         # Update API
         c = self.api.get_or_create_collection(
@@ -185,20 +189,19 @@ def get_or_create_coll(
     )
     def modify_coll(
         self,
-        coll: strategies.Collection,
+        coll: strategies.ExternalCollection,
         new_metadata: types.Metadata,
         new_name: Optional[str],
-    ) -> MultipleResults[strategies.Collection]:
-        # early exit if a col with name exists but with diff id, possibly in another tenant/db
-        if coll.name in self.model and f"__id__:{coll.id}" not in self.model:
-            return multiple()
+    ) -> MultipleResults[strategies.ExternalCollection]:
         if coll.name not in self.model:
             with pytest.raises(Exception):
                 c = self.api.get_collection(name=coll.name)
             return multiple()
 
         c = self.api.get_collection(name=coll.name)
-        _metadata: Optional[Mapping[str, Any]] = coll.metadata
+        print("before API:", c)
+        print("model:", self.model)
+        _metadata: Optional[Mapping[str, Any]] = self.model[coll.name]
         _name: str = coll.name
         if new_metadata is not None:
             if len(new_metadata) == 0:
@@ -221,10 +224,12 @@ def modify_coll(
             self.delete_from_model(coll.name)
             coll.name = new_name
             _name = new_name
-        self.set_model(_name, _metadata, str(coll.id))
 
+        self.set_model(_name, _metadata)
         c.modify(metadata=_metadata, name=_name)
         c = self.api.get_collection(name=coll.name)
+        print("after API:", c)
+        print("model:", self.model)
 
         assert c.name == coll.name
         assert c.metadata == self.model[coll.name]
@@ -234,18 +239,13 @@ def set_model(
         self,
         name: str,
         metadata: Optional[types.CollectionMetadata],
-        id: Optional[str] = None,
     ) -> None:
         model = self.model
         model[name] = metadata
-        if id is not None:
-            model[f"__id__:{id}"] = metadata
 
-    def delete_from_model(self, name: str, id: Optional[str] = None) -> None:
+    def delete_from_model(self, name: str) -> None:
         model = self.model
         del model[name]
-        if id is not None:
-            del model[f"__id__:{id}"]
 
     @property
     def model(self) -> Dict[str, Optional[types.CollectionMetadata]]:
@@ -255,3 +255,87 @@ def model(self) -> Dict[str, Optional[types.CollectionMetadata]]:
 def test_collections(caplog: pytest.LogCaptureFixture, api: ClientAPI) -> None:
     caplog.set_level(logging.ERROR)
     run_state_machine_as_test(lambda: CollectionStateMachine(api))  # type: ignore
+
+
+# Below are tests that have failed in the past. If your test fails, please add
+# it to protect against regressions in the test harness itself. If you need
+# help doing so, talk to ben.
+
+
+def test_previously_failing_one(api: ClientAPI) -> None:
+    state = CollectionStateMachine(api)
+    state.initialize()
+    # I don't know why the typechecker is red here. This code is correct and is
+    # pulled from the logs.
+    (v1,) = state.get_or_create_coll(
+        coll=strategies.ExternalCollection(
+            name='jjn2yjLW1zp2T\n',
+            metadata=None,
+            embedding_function=hashing_embedding_function(dtype=numpy.float32, dim=863)
+        ),
+        new_metadata=None
+    )
+    (v6,) = state.get_or_create_coll(
+        coll=strategies.ExternalCollection(
+            name='jjn2yjLW1zp2T\n',
+            metadata=None,
+            embedding_function=hashing_embedding_function(dtype=numpy.float32, dim=863)
+        ),
+        new_metadata=None
+    )
+    state.modify_coll(
+        coll=v1,
+        new_metadata={'7': -1281, 'fGe': -0.0, 'K5j': 'im'},
+        new_name=None
+    )
+    state.modify_coll(
+        coll=v6,
+        new_metadata=None,
+        new_name=None
+    )
+
+
+# https://github.com/chroma-core/chroma/commit/cf476d70f0cebb7c87cb30c7172ba74d6ea175cd#diff-e81868b665d149bb315d86890dea6fc6a9fc9fc9ea3089aa7728142b54f622c5R210
+def test_previously_failing_two(api: ClientAPI) -> None:
+    state = CollectionStateMachine(api)
+    state.initialize()
+    (v13,) = state.get_or_create_coll(
+        coll=strategies.ExternalCollection(
+            name='C1030',
+            metadata={},
+            embedding_function=hashing_embedding_function(dim=2, dtype=numpy.float32)
+        ),
+        new_metadata=None
+    )
+    (v15,) = state.modify_coll(
+        coll=v13,
+        new_metadata={'0': '10', '40': '0', 'p1nviWeL7fO': 'qN', '7b': 'YS', 'VYWq4LEMWjCo': True},
+        new_name='OF5F0MzbQg\n'
+    )
+    state.get_or_create_coll(
+        coll=strategies.ExternalCollection(
+            name='VS0QGh',
+            metadata={'h': 5.681951615025145e-227, 'A1': 61126, 'uhUhLEEMfeC_kN': 2147483647, 'weF': 'pSP', 'B3DSaP': False, '6H533K': 1.192092896e-07},
+            embedding_function=hashing_embedding_function(dim=1915, dtype=numpy.float32)
+        ),
+        new_metadata={'xVW09xUpDZA': 31734,
+                      'g': 1.1,
+                      'n1dUTalF-MY': -1000000.0,
+                      'y': 'G3EtXTZ',
+                      'ugXZ_hK': 5494
+                      }
+    )
+    (v17,) = state.modify_coll(
+        coll=v15,
+        new_metadata={'L35J2S': 'K0l026'},
+        new_name='Ai1\n'
+    )
+    (v18,) = state.get_or_create_coll(coll=v13, new_metadata=None)
+    state.get_or_create_coll(
+        coll=strategies.ExternalCollection(
+            name='VS0QGh',
+            metadata=None,
+            embedding_function=hashing_embedding_function(dim=326, dtype=numpy.float16)
+        ),
+        new_metadata=None
+    )
\ No newline at end of file