Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -1578,13 +1578,14 @@ def get_default_sparse_query_encoder(

def _detect_vector_format(self, collection_name: str) -> None:
"""
Detect the vector format of an existing collection.
This allows backward compatibility with collections that were created before
the refactoring to use named vectors consistently.
Detect and handle old vector formats from existing collections.
- named vs non-named vectors
- new sparse vector field name vs old sparse vector field name
"""
try:
collection_info = self._client.get_collection(collection_name)
vectors_config = collection_info.config.params.vectors
sparse_vectors = collection_info.config.params.sparse_vectors or {}

# Check if we have an unnamed vector format (where name is empty string)
if isinstance(vectors_config, dict):
Expand All @@ -1597,18 +1598,28 @@ def _detect_vector_format(self, collection_name: str) -> None:
self._legacy_vector_format = True
self.dense_vector_name = LEGACY_UNNAMED_VECTOR

# Detect sparse vector name if any sparse vectors configured
if isinstance(sparse_vectors, dict) and len(sparse_vectors) > 0:
if self.sparse_vector_name in sparse_vectors:
pass
elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors:
self.sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD

except Exception as e:
logger.warning(
f"Could not detect vector format for collection {collection_name}: {e}"
)

async def _adetect_vector_format(self, collection_name: str) -> None:
"""
Asynchronous method to detect the vector format of an existing collection.
Asynchronous method to detect and handle old vector formats from existing collections.
- named vs non-named vectors
- new sparse vector field name vs old sparse vector field name
"""
try:
collection_info = await self._aclient.get_collection(collection_name)
vectors_config = collection_info.config.params.vectors
sparse_vectors = collection_info.config.params.sparse_vectors or {}

# Check if we have an unnamed vector format (where name is empty string)
if isinstance(vectors_config, dict):
Expand All @@ -1621,6 +1632,13 @@ async def _adetect_vector_format(self, collection_name: str) -> None:
self._legacy_vector_format = True
self.dense_vector_name = LEGACY_UNNAMED_VECTOR

# Detect sparse vector name if any sparse vectors configured
if isinstance(sparse_vectors, dict) and len(sparse_vectors) > 0:
if self.sparse_vector_name in sparse_vectors:
pass
elif DEFAULT_SPARSE_VECTOR_NAME_OLD in sparse_vectors:
self.sparse_vector_name = DEFAULT_SPARSE_VECTOR_NAME_OLD

except Exception as e:
logger.warning(
f"Could not detect vector format for collection {collection_name}: {e}"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ dev = [

[project]
name = "llama-index-vector-stores-qdrant"
version = "0.8.4"
version = "0.8.5"
description = "llama-index vector_stores qdrant integration"
authors = [{name = "Your Name", email = "[email protected]"}]
requires-python = ">=3.9,<3.14"
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,8 @@
FilterCondition,
FilterOperator,
)
from qdrant_client import AsyncQdrantClient
from qdrant_client.http import models as qmodels

requires_qdrant_cluster = pytest.mark.skipif(
not os.getenv("QDRANT_CLUSTER_URL"),
Expand Down Expand Up @@ -694,3 +696,125 @@ def test_create_payload_indexes_returns_early_when_no_payload_indexes(
vector_store: QdrantVectorStore,
):
vector_store._create_payload_indexes()


def test_sparse_vector_name_detection_switches_to_legacy() -> None:
"""If only legacy sparse name exists in collection, switch to it."""
mock_client = MagicMock(spec=QdrantClient)

class DummyParams:
def __init__(self):
self.vectors = {"text-dense": object()}
self.sparse_vectors = {"text-sparse": object()}

class DummyConfig:
def __init__(self):
self.params = DummyParams()

class DummyCollection:
def __init__(self):
self.config = DummyConfig()

mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value = DummyCollection()

vs = QdrantVectorStore(collection_name="test_collection", client=mock_client)

assert vs.sparse_vector_name == "text-sparse"


def test_sparse_vector_name_detection_keeps_new() -> None:
"""If only new sparse name exists in collection, keep the default new name."""
mock_client = MagicMock(spec=QdrantClient)

class DummyParams:
def __init__(self):
self.vectors = {"text-dense": object()}
self.sparse_vectors = {"text-sparse-new": object()}

class DummyConfig:
def __init__(self):
self.params = DummyParams()

class DummyCollection:
def __init__(self):
self.config = DummyConfig()

mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value = DummyCollection()

vs = QdrantVectorStore(collection_name="test_collection", client=mock_client)

assert vs.sparse_vector_name == "text-sparse-new"


def test_sparse_vector_name_respects_user_specified() -> None:
"""If a user specifies a sparse vector name present in the collection, don't override it."""
mock_client = MagicMock(spec=QdrantClient)

class DummyParams:
def __init__(self):
self.vectors = {"text-dense": object()}
self.sparse_vectors = {
"custom-sparse": object(),
"text-sparse-new": object(),
}

class DummyConfig:
def __init__(self):
self.params = DummyParams()

class DummyCollection:
def __init__(self):
self.config = DummyConfig()

mock_client.collection_exists.return_value = True
mock_client.get_collection.return_value = DummyCollection()

vs = QdrantVectorStore(
collection_name="test_collection",
client=mock_client,
sparse_vector_name="custom-sparse",
)

assert vs.sparse_vector_name == "custom-sparse"


@pytest.mark.asyncio
async def test_async_query_initializes_with_async_client_only() -> None:
"""
When only an async client is provided and the collection already exists,
aquery should lazily detect vector format and successfully return results.
"""
collection_name = "async_init_test"
aclient = AsyncQdrantClient(":memory:")

# Create collection with named dense vector
await aclient.create_collection(
collection_name=collection_name,
vectors_config={
"text-dense": qmodels.VectorParams(size=2, distance=qmodels.Distance.COSINE)
},
)

# Insert a single point
await aclient.upsert(
collection_name=collection_name,
points=[
qmodels.PointStruct(
id="11111111-1111-1111-1111-111111111111",
vector={"text-dense": [1.0, 0.0]},
payload={"text": "hello"},
)
],
)

# Initialize store with async client only
store = QdrantVectorStore(collection_name=collection_name, aclient=aclient)

query = VectorStoreQuery(query_embedding=[1.0, 0.0], similarity_top_k=1)
result = await store.aquery(query)

assert result is not None
assert len(result.nodes) == 1
assert getattr(result.nodes[0], "text", None) == "hello"

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading