Skip to content

Commit cf476d7

Browse files
authored
[BUG]: Fixed test_collections.py property test (#1716)
Needed to fix the failing property tests in #1715 ## Description of changes *Summarize the changes made by this PR.* - Improvements & Bug fixes - Moved the model update after conditional checks for new_name and metadata. - New functionality - ... ## Test plan *How are these changes tested?* - [ ] Tests pass locally with `pytest` for python, `yarn test` for js ## Documentation Changes Failure logs + Error analysis: ``` > assert c.metadata == self.model[coll.name] E AssertionError: assert {'g': 1.1, 'n...': 31734, ...} == {'3': 'd71IL'...235e-208, ...} E E Left contains 5 more items: E {'g': 1.1, E 'n1dUTalF-MY': -1000000.0, E 'ugXZ_hK': 5494, E 'xVW09xUpDZA': 31734, E 'y': 'G3EtXTZ'} E Right contains 9 more items: E {'3': 'd71IL', E '45227B': '65', E '7DjCkbusc-K': 'vc94', E '8-tD9nJd': 4.8728578364902235e-208, E 'Bpyj': -675165.8688164671, E 'Uy6KZu6abCD9Z': -72, E 'giC': -6.103515625e-05, E 'pO4': -0.0, E 'r3': -41479} E E Full diff: E { E + 'g': 1.1, E + 'n1dUTalF-MY': -1000000.0, E + 'ugXZ_hK': 5494, E + 'xVW09xUpDZA': 31734, E + 'y': 'G3EtXTZ', E - '3': 'd71IL', E - '45227B': '65', E - '7DjCkbusc-K': 'vc94', E - '8-tD9nJd': 4.8728578364902235e-208, E - 'Bpyj': -675165.8688164671, E - 'Uy6KZu6abCD9Z': -72, E - 'giC': -6.103515625e-05, E - 'pO4': -0.0, E - 'r3': -41479, E } E Falsifying example: E state = CollectionStateMachine() E state.initialize() E state.list_collections_with_limit_offset(limit=5, offset=0) E state.list_collections_with_limit_offset(limit=4, offset=5) E (v1,) = state.get_or_create_coll(coll=Collection(name='E60V1ekr9eDcL\n', id=UUID('4435abf2-9fc6-4d5a-bb7b-33177a956d44'), metadata={'_m5jalwo': -228}, dimension=1356, dtype=<class 'numpy.float64'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181bb0590>), new_metadata={'k5o6Q': 'Op', E 'LP': -5.960464477539063e-08, E 'pzHdzczVCn': '81', E '7': False, E 'e4Lz': 999999.0, E '206': False}) E (v2,) = state.get_or_create_coll(coll=v1, new_metadata=None) E (v3,) = state.get_or_create_coll(coll=v1, new_metadata={'4OQN': -2097032423, E 'cW': -0.99999, E 'o6wq3': -147, E 'M8j3KBU': -2.2250738585072014e-308, E 'D8nZrA0': 252, E 'up4P_': 34761, E 'L_win': -6.103515625e-05, E '5kt': '_q', E 'UybO2dJF4': -0.3333333333333333, E 'NfQ83VsmI': 'Qpy', E 'fk': -1.192092896e-07, E 'J1ck': 'ozL'}) E (v4,) = state.get_or_create_coll(coll=Collection(name='nOeHg-OXVl', id=UUID('9c28b027-9f22-409c-b3fd-c5de03b60018'), metadata=None, dimension=1009, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181bdfe50>), new_metadata={'p4isW': 'k8l', E 'k2tFn3v1E': True, E 'R': 'ji-2d5lDGV', E 'K5vdi': False, E 'TZs': False, E 'OgJ_DZ2j': False, E 'ovZjD3': -64297, E '9p': True, E '32f3nw8h2d54LPCzsV': 1733994327, E '4P': 2.896381722565434e-121}) E state.list_collections_with_limit_offset(limit=2, offset=0) E state.list_collections_with_limit_offset(limit=3, offset=0) E state.list_collections_with_limit_offset(limit=5, offset=5) E (v5,) = state.modify_coll(coll=v4, new_metadata=None, new_name=None) E (v6,) = state.get_or_create_coll(coll=Collection(name='A1w5m1l5I\n', id=UUID('606d59a6-6f66-456d-81ca-a8ea029c318c'), metadata={'3': '6Y'}, dimension=1544, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d6d110>), new_metadata=None) E (v7,) = state.get_or_create_coll(coll=v4, new_metadata={'01316': -0.0, '14UwVu': 81, 'C9eMDDdnB0oy': False, 'n964': '0a'}) E state.modify_coll(coll=v7, new_metadata={}, new_name='B-5Z2m2j52121') E state.get_or_create_coll(coll=Collection(name='E31\n', id=UUID('e67426e8-8595-4916-92a6-b2777b52f157'), metadata={'0Kr5Wp': -769, '9xT': 143980.04500299558, '8': True}, dimension=1800, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d6d6d0>), new_metadata={}) E state.list_collections_with_limit_offset(limit=2, offset=1) E state.list_collections_with_limit_offset(limit=2, offset=0) E state.list_collections_with_limit_offset(limit=1, offset=0) E state.list_collections_with_limit_offset(limit=1, offset=1) E (v8,) = state.get_or_create_coll(coll=Collection(name='A00\n', id=UUID('01522a4f-3383-4a58-8b18-0418e38e3ec6'), metadata=None, dimension=1032, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d94bd0>), new_metadata=None) E (v9,) = state.get_or_create_coll(coll=v6, new_metadata=None) E state.list_collections_with_limit_offset(limit=3, offset=2) E (v10,) = state.modify_coll(coll=v3, new_metadata=None, new_name=None) E (v11,) = state.modify_coll(coll=v10, new_metadata=None, new_name=None) E state.modify_coll(coll=v9, new_metadata={}, new_name=None) E (v12,) = state.get_or_create_coll(coll=Collection(name='A10\n', id=UUID('01efb806-fffa-4ce6-b285-b9aae55f50af'), metadata={}, dimension=258, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd183bbe5d0>), new_metadata=None) E state.modify_coll(coll=v11, new_metadata={}, new_name='A01011110\n') E state.list_collections_with_limit_offset(limit=3, offset=1) ------ Problem start here ------ E (v13,) = state.get_or_create_coll(coll=Collection(name='C1030', id=UUID('7858d028-1295-4769-96c1-e58bf242b7bd'), metadata={}, dimension=2, dtype=<class 'numpy.float32'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181bbff10>), new_metadata=None) E (v14,) = state.get_or_create_coll(coll=Collection(name='A01200671\n', id=UUID('f77d01a4-e43f-4b17-9579-daadccad2f71'), metadata={'0': 'L', '01': -4}, dimension=1282, dtype=<class 'numpy.float32'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d5a9d0>), new_metadata=None) E state.list_collections_with_limit_offset(limit=2, offset=1) E (v15,) = state.modify_coll(coll=v13, new_metadata={'0': '10', '40': '0', 'p1nviWeL7fO': 'qN', '7b': 'YS', 'VYWq4LEMWjCo': True}, new_name='OF5F0MzbQg\n') E (v16,) = state.get_or_create_coll(coll=Collection(name='VS0QGh', id=UUID('c6b85c1d-c3e9-4d37-b9ca-c4b4266193e9'), metadata={'h': 5.681951615025145e-227, 'A1': 61126, 'uhUhLEEMfeC_kN': 2147483647, 'weF': 'pSP', 'B3DSaP': False, '6H533K': 1.192092896e-07}, dimension=1915, dtype=<class 'numpy.float32'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=False, has_embeddings=True, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d202d0>), new_metadata={'xVW09xUpDZA': 31734, E 'g': 1.1, E 'n1dUTalF-MY': -1000000.0, E 'y': 'G3EtXTZ', E 'ugXZ_hK': 5494}) E state.list_collections_with_limit_offset(limit=4, offset=5) E state.modify_coll(coll=v16, new_metadata={'giC': -6.103515625e-05, E '45227B': '65', E 'Uy6KZu6abCD9Z': -72, E 'r3': -41479, E 'pO4': -0.0, E 'Bpyj': -675165.8688164671, E '8-tD9nJd': 4.8728578364902235e-208, E '7DjCkbusc-K': 'vc94', E '3': 'd71IL'}, new_name='OF5F0MzbQg\n') E state.list_collections_with_limit_offset(limit=4, offset=4) E (v17,) = state.modify_coll(coll=v15, new_metadata={'L35J2S': 'K0l026'}, new_name='Ai1\n') E (v18,) = state.get_or_create_coll(coll=v13, new_metadata=None) E state.list_collections_with_limit_offset(limit=3, offset=1) E (v19,) = state.modify_coll(coll=v14, new_metadata=None, new_name='F0K570\n') E (v20,) = state.get_or_create_coll(coll=Collection(name='Ad5m003\n', id=UUID('5e23b560-7f62-4f14-bf80-93f5ff4e906a'), metadata={'3M': 'q_'}, dimension=57, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d5aad0>), new_metadata={'_000': 852410}) E (v21,) = state.get_or_create_coll(coll=v14, new_metadata=None) E state.list_collections_with_limit_offset(limit=4, offset=1) E (v22,) = state.modify_coll(coll=v21, new_metadata=None, new_name=None) E (v23,) = state.modify_coll(coll=v22, new_metadata=None, new_name=None) E state.list_collections_with_limit_offset(limit=1, offset=1) E state.get_or_create_coll(coll=Collection(name='VS0QGh', id=UUID('ca92837d-3425-436c-bf11-dba969f0f8c7'), metadata=None, dimension=326, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d6f4d0>), new_metadata=None) E state.teardown() ``` The problem starts in v13 where we create a new collection named `C1030` In v15 we modify the collection `C1030` and rename it to `OF5F0MzbQg\n` In v16 we create a new collection named `VS0QGh` We try to modify the collection `VS0QGh` and rename it to `OF5F0MzbQg\n` which is the same name as the collection `C1030` which is fails in the and we return empty from the rule. However we have already updated the model: ```python if new_metadata is not None: if len(new_metadata) == 0: with pytest.raises(Exception): c = self.api.get_or_create_collection( name=coll.name, metadata=new_metadata, embedding_function=coll.embedding_function, ) return multiple() coll.metadata = new_metadata self.set_model(coll.name, coll.metadata) # <--- here we update the metadata if new_name is not None: if new_name in self.model and new_name != coll.name: with pytest.raises(Exception): # <--- fail here to rename the collection to `OF5F0MzbQg\n` c.modify(metadata=new_metadata, name=new_name) return multiple() prev_metadata = self.model[coll.name] self.delete_from_model(coll.name) self.set_model(new_name, prev_metadata) coll.name = new_name ``` then in `E state.get_or_create_coll(coll=Collection(name='VS0QGh', id=UUID('ca92837d-3425-436c-bf11-dba969f0f8c7'), metadata=None, dimension=326, dtype=<class 'numpy.float16'>, topic='topic', known_metadata_keys={}, known_document_keywords=[], has_documents=True, has_embeddings=False, embedding_function=<chromadb.test.property.strategies.hashing_embedding_function object at 0x7fd181d6f4d0>), new_metadata=None)` We try to create or get collection `VS0QGh` which exists in API and in state. Metadata and new metadata are None so we fall into case 0. Existing collection with old metadata and but we take the metadata from model which has been updated after the failure above. So we have API version of the metadata and partly updated model metadata, which causes the failure.
1 parent 93194c8 commit cf476d7

File tree

1 file changed

+22
-11
lines changed

1 file changed

+22
-11
lines changed

chromadb/test/property/test_collections.py

Lines changed: 22 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -14,7 +14,7 @@
1414
run_state_machine_as_test,
1515
MultipleResults,
1616
)
17-
from typing import Dict, Optional
17+
from typing import Dict, Optional, Any, Mapping
1818

1919

2020
class CollectionStateMachine(RuleBasedStateMachine):
@@ -54,7 +54,7 @@ def create_coll(
5454
metadata=coll.metadata,
5555
embedding_function=coll.embedding_function,
5656
)
57-
self.set_model(coll.name, coll.metadata)
57+
self.set_model(coll.name, coll.metadata, str(coll.id))
5858

5959
assert c.name == coll.name
6060
assert c.metadata == self.model[coll.name]
@@ -85,7 +85,7 @@ def delete_coll(self, coll: strategies.Collection) -> None:
8585
@rule()
8686
def list_collections(self) -> None:
8787
colls = self.api.list_collections()
88-
assert len(colls) == len(self.model)
88+
assert len(colls) == len([c for c in self.model if not c.startswith("__id__")])
8989
for c in colls:
9090
assert c.name in self.model
9191

@@ -163,7 +163,7 @@ def get_or_create_coll(
163163
coll.metadata = (
164164
self.model[coll.name] if new_metadata is None else new_metadata
165165
)
166-
self.set_model(coll.name, coll.metadata)
166+
self.set_model(coll.name, coll.metadata, str(coll.id))
167167

168168
# Update API
169169
c = self.api.get_or_create_collection(
@@ -189,13 +189,17 @@ def modify_coll(
189189
new_metadata: types.Metadata,
190190
new_name: Optional[str],
191191
) -> MultipleResults[strategies.Collection]:
192+
# early exit if a col with name exists but with diff id, possibly in another tenant/db
193+
if coll.name in self.model and f"__id__:{coll.id}" not in self.model:
194+
return multiple()
192195
if coll.name not in self.model:
193196
with pytest.raises(Exception):
194197
c = self.api.get_collection(name=coll.name)
195198
return multiple()
196199

197200
c = self.api.get_collection(name=coll.name)
198-
201+
_metadata: Optional[Mapping[str, Any]] = coll.metadata
202+
_name: str = coll.name
199203
if new_metadata is not None:
200204
if len(new_metadata) == 0:
201205
with pytest.raises(Exception):
@@ -206,35 +210,42 @@ def modify_coll(
206210
)
207211
return multiple()
208212
coll.metadata = new_metadata
209-
self.set_model(coll.name, coll.metadata)
213+
_metadata = new_metadata
210214

211215
if new_name is not None:
212216
if new_name in self.model and new_name != coll.name:
213217
with pytest.raises(Exception):
214218
c.modify(metadata=new_metadata, name=new_name)
215219
return multiple()
216220

217-
prev_metadata = self.model[coll.name]
218221
self.delete_from_model(coll.name)
219-
self.set_model(new_name, prev_metadata)
220222
coll.name = new_name
223+
_name = new_name
224+
self.set_model(_name, _metadata, str(coll.id))
221225

222-
c.modify(metadata=new_metadata, name=new_name)
226+
c.modify(metadata=_metadata, name=_name)
223227
c = self.api.get_collection(name=coll.name)
224228

225229
assert c.name == coll.name
226230
assert c.metadata == self.model[coll.name]
227231
return multiple(coll)
228232

229233
def set_model(
230-
self, name: str, metadata: Optional[types.CollectionMetadata]
234+
self,
235+
name: str,
236+
metadata: Optional[types.CollectionMetadata],
237+
id: Optional[str] = None,
231238
) -> None:
232239
model = self.model
233240
model[name] = metadata
241+
if id is not None:
242+
model[f"__id__:{id}"] = metadata
234243

235-
def delete_from_model(self, name: str) -> None:
244+
def delete_from_model(self, name: str, id: Optional[str] = None) -> None:
236245
model = self.model
237246
del model[name]
247+
if id is not None:
248+
del model[f"__id__:{id}"]
238249

239250
@property
240251
def model(self) -> Dict[str, Optional[types.CollectionMetadata]]:

0 commit comments

Comments
 (0)