From dec98fe28f26de4a63fdbd27af4bbd17e1e0f0d4 Mon Sep 17 00:00:00 2001 From: Sashin Date: Thu, 23 Nov 2023 22:18:08 +0200 Subject: [PATCH 1/5] Initiaal push --- pyproject.toml | 2 ++ swarms/memory/qdrant.py | 59 +++++++++++++++++++++++++++++++++-- swarms/memory/qdrant/usage.py | 18 +++++++++++ 3 files changed, 76 insertions(+), 3 deletions(-) create mode 100644 swarms/memory/qdrant/usage.py diff --git a/pyproject.toml b/pyproject.toml index 075bbd15..b9b0f89a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -23,7 +23,9 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.8.1" +sentence_transformers = "*" transformers = "*" +qdrant_client = "*" openai = "0.28.0" langchain = "*" asyncio = "*" diff --git a/swarms/memory/qdrant.py b/swarms/memory/qdrant.py index 7bc5018e..24f0dc82 100644 --- a/swarms/memory/qdrant.py +++ b/swarms/memory/qdrant.py @@ -1,6 +1,59 @@ -""" -QDRANT MEMORY CLASS +from httpx import RequestError +from qdrant_client import QdrantClient +from qdrant_client.http.models import Distance, VectorParams, PointStruct +from sentence_transformers import SentenceTransformer +class Qdrant: + def __init__(self,api_key, host, port=6333, collection_name="qdrant", model_name="BAAI/bge-small-en-v1.5", https=True ): + self.client = QdrantClient(url=host, port=port, api_key=api_key) #, port=port, api_key=api_key, https=False + self.collection_name = collection_name + self._load_embedding_model(model_name) + self._setup_collection() + def _load_embedding_model(self, model_name): + # Load the embedding model + self.model = SentenceTransformer(model_name) + def _setup_collection(self): + # Check if the collection already exists + try: + exists = self.client.get_collection(self.collection_name) + return + except Exception: + # Collection does not exist, create it + self.client.create_collection( + collection_name=self.collection_name, + vectors_config=VectorParams(size=self.model.get_sentence_embedding_dimension(), distance=Distance.DOT), + ) + print(f"Collection '{self.collection_name}' created.") + else: + print(f"Collection '{self.collection_name}' already exists.") -""" + def add_vectors(self, docs): + # Add vectors with payloads to the collection + points = [] + for i, doc in enumerate(docs): + if doc.page_content: + embedding = self.model.encode(doc.page_content, normalize_embeddings=True) + points.append(PointStruct(id=i + 1, vector=embedding, payload={"content":doc.page_content})) + else: + print(f"Document at index {i} is missing 'text' or 'payload' key") + + operation_info = self.client.upsert( + collection_name=self.collection_name, + wait=True, + points=points, + ) + print(operation_info) + def search_vectors(self, query, limit=3): + query_vector= self.model.encode(query, normalize_embeddings=True) + # Search for similar vectors + search_result = self.client.search( + collection_name=self.collection_name, + query_vector=query_vector, + limit=limit + ) + return search_result + + + +#TODO, use kwargs in constructor, have search result be text \ No newline at end of file diff --git a/swarms/memory/qdrant/usage.py b/swarms/memory/qdrant/usage.py new file mode 100644 index 00000000..0378d540 --- /dev/null +++ b/swarms/memory/qdrant/usage.py @@ -0,0 +1,18 @@ +from langchain.document_loaders import CSVLoader +from swarms.memory import qdrant + +loader = CSVLoader(file_path="../document_parsing/aipg/aipg.csv", encoding='utf-8-sig') +docs = loader.load() + + +# Initialize the Qdrant instance +# See qdrant documentation on how to run locally +qdrant_client = qdrant.Qdrant(host ="https://697ea26c-2881-4e17-8af4-817fcb5862e8.europe-west3-0.gcp.cloud.qdrant.io", collection_name="qdrant", api_key="BhG2_yINqNU-aKovSEBadn69Zszhbo5uaqdJ6G_qDkdySjAljvuPqQ") +qdrant_client.add_vectors(docs) + +# Perform a search +search_query = "Who is jojo" +search_results = qdrant_client.search_vectors(search_query) +print("Search Results:") +for result in search_results: + print(result) From d985fdad0022b80e191302aa20062e07b1474e2e Mon Sep 17 00:00:00 2001 From: Sashin Date: Fri, 24 Nov 2023 00:31:08 +0200 Subject: [PATCH 2/5] Initiaal push --- {swarms => playground}/memory/qdrant/usage.py | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename {swarms => playground}/memory/qdrant/usage.py (100%) diff --git a/swarms/memory/qdrant/usage.py b/playground/memory/qdrant/usage.py similarity index 100% rename from swarms/memory/qdrant/usage.py rename to playground/memory/qdrant/usage.py From 018a0f437f7bc6a84d231e119584eddce9e30654 Mon Sep 17 00:00:00 2001 From: Sashin Date: Fri, 24 Nov 2023 20:37:17 +0200 Subject: [PATCH 3/5] Initiaal push --- swarms/memory/qdrant.py | 73 ++++++++++++++++++++++------------------- tests/memory/qdrant.py | 40 ++++++++++++++++++++++ 2 files changed, 79 insertions(+), 34 deletions(-) create mode 100644 tests/memory/qdrant.py diff --git a/swarms/memory/qdrant.py b/swarms/memory/qdrant.py index 24f0dc82..c06efeba 100644 --- a/swarms/memory/qdrant.py +++ b/swarms/memory/qdrant.py @@ -9,51 +9,56 @@ class Qdrant: self._load_embedding_model(model_name) self._setup_collection() - def _load_embedding_model(self, model_name): - # Load the embedding model - self.model = SentenceTransformer(model_name) + def _load_embedding_model(self, model_name: str): + try: + self.model = SentenceTransformer(model_name) + except Exception as e: + print(f"Error loading embedding model: {e}") def _setup_collection(self): - # Check if the collection already exists try: exists = self.client.get_collection(self.collection_name) - return - except Exception: - # Collection does not exist, create it + if exists: + print(f"Collection '{self.collection_name}' already exists.") + except Exception as e: self.client.create_collection( collection_name=self.collection_name, vectors_config=VectorParams(size=self.model.get_sentence_embedding_dimension(), distance=Distance.DOT), ) print(f"Collection '{self.collection_name}' created.") - else: - print(f"Collection '{self.collection_name}' already exists.") - def add_vectors(self, docs): - # Add vectors with payloads to the collection + def add_vectors(self, docs: List[dict]): points = [] for i, doc in enumerate(docs): - if doc.page_content: - embedding = self.model.encode(doc.page_content, normalize_embeddings=True) - points.append(PointStruct(id=i + 1, vector=embedding, payload={"content":doc.page_content})) - else: - print(f"Document at index {i} is missing 'text' or 'payload' key") - - operation_info = self.client.upsert( - collection_name=self.collection_name, - wait=True, - points=points, - ) - print(operation_info) - def search_vectors(self, query, limit=3): - query_vector= self.model.encode(query, normalize_embeddings=True) - # Search for similar vectors - search_result = self.client.search( - collection_name=self.collection_name, - query_vector=query_vector, - limit=limit - ) - return search_result - + try: + if 'page_content' in doc: + embedding = self.model.encode(doc['page_content'], normalize_embeddings=True) + points.append(PointStruct(id=i + 1, vector=embedding, payload={"content": doc['page_content']})) + else: + print(f"Document at index {i} is missing 'page_content' key") + except Exception as e: + print(f"Error processing document at index {i}: {e}") + try: + operation_info = self.client.upsert( + collection_name=self.collection_name, + wait=True, + points=points, + ) + return operation_info + except Exception as e: + print(f"Error adding vectors: {e}") + return None -#TODO, use kwargs in constructor, have search result be text \ No newline at end of file + def search_vectors(self, query: str, limit: int = 3): + try: + query_vector = self.model.encode(query, normalize_embeddings=True) + search_result = self.client.search( + collection_name=self.collection_name, + query_vector=query_vector, + limit=limit + ) + return search_result + except Exception as e: + print(f"Error searching vectors: {e}") + return None diff --git a/tests/memory/qdrant.py b/tests/memory/qdrant.py new file mode 100644 index 00000000..577ede2a --- /dev/null +++ b/tests/memory/qdrant.py @@ -0,0 +1,40 @@ +import pytest +from unittest.mock import Mock, patch + +from swarms.memory.qdrant import Qdrant + + +@pytest.fixture +def mock_qdrant_client(): + with patch('your_module.QdrantClient') as MockQdrantClient: + yield MockQdrantClient() + +@pytest.fixture +def mock_sentence_transformer(): + with patch('sentence_transformers.SentenceTransformer') as MockSentenceTransformer: + yield MockSentenceTransformer() + +@pytest.fixture +def qdrant_client(mock_qdrant_client, mock_sentence_transformer): + client = Qdrant(api_key="your_api_key", host="your_host") + yield client + +def test_qdrant_init(qdrant_client, mock_qdrant_client): + assert qdrant_client.client is not None + +def test_load_embedding_model(qdrant_client, mock_sentence_transformer): + qdrant_client._load_embedding_model("model_name") + mock_sentence_transformer.assert_called_once_with("model_name") + +def test_setup_collection(qdrant_client, mock_qdrant_client): + qdrant_client._setup_collection() + mock_qdrant_client.get_collection.assert_called_once_with(qdrant_client.collection_name) + +def test_add_vectors(qdrant_client, mock_qdrant_client): + mock_doc = Mock(page_content="Sample text") + qdrant_client.add_vectors([mock_doc]) + mock_qdrant_client.upsert.assert_called_once() + +def test_search_vectors(qdrant_client, mock_qdrant_client): + qdrant_client.search_vectors("test query") + mock_qdrant_client.search.assert_called_once() From f32443e8afabbcea733ea4b75aeffc6b8d667447 Mon Sep 17 00:00:00 2001 From: Sashin Date: Fri, 24 Nov 2023 20:38:16 +0200 Subject: [PATCH 4/5] Initiaal push --- swarms/memory/qdrant.py | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/swarms/memory/qdrant.py b/swarms/memory/qdrant.py index c06efeba..8680a6d7 100644 --- a/swarms/memory/qdrant.py +++ b/swarms/memory/qdrant.py @@ -1,13 +1,19 @@ +from typing import List +from qdrant_client.http.models import CollectionInfoResponse, OperationResponse, SearchResult +from sentence_transformers import SentenceTransformer from httpx import RequestError from qdrant_client import QdrantClient from qdrant_client.http.models import Distance, VectorParams, PointStruct -from sentence_transformers import SentenceTransformer + class Qdrant: - def __init__(self,api_key, host, port=6333, collection_name="qdrant", model_name="BAAI/bge-small-en-v1.5", https=True ): - self.client = QdrantClient(url=host, port=port, api_key=api_key) #, port=port, api_key=api_key, https=False - self.collection_name = collection_name - self._load_embedding_model(model_name) - self._setup_collection() + def __init__(self, api_key: str, host: str, port: int = 6333, collection_name: str = "qdrant", model_name: str = "BAAI/bge-small-en-v1.5", https: bool = True): + try: + self.client = QdrantClient(url=host, port=port, api_key=api_key) + self.collection_name = collection_name + self._load_embedding_model(model_name) + self._setup_collection() + except RequestError as e: + print(f"Error setting up QdrantClient: {e}") def _load_embedding_model(self, model_name: str): try: From 305d02bd90bd2f937a2119844a18a41de9ac114e Mon Sep 17 00:00:00 2001 From: Sashin Date: Mon, 27 Nov 2023 19:29:32 +0200 Subject: [PATCH 5/5] Initiaal push --- docs/swarms/memory/qdrant.md | 81 ++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + swarms/memory/qdrant.py | 42 ++++++++++++++++++- 3 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 docs/swarms/memory/qdrant.md diff --git a/docs/swarms/memory/qdrant.md b/docs/swarms/memory/qdrant.md new file mode 100644 index 00000000..3717d94f --- /dev/null +++ b/docs/swarms/memory/qdrant.md @@ -0,0 +1,81 @@ +# Qdrant Client Library + +## Overview + +The Qdrant Client Library is designed for interacting with the Qdrant vector database, allowing efficient storage and retrieval of high-dimensional vector data. It integrates with machine learning models for embedding and is particularly suited for search and recommendation systems. + +## Installation + +```python +pip install qdrant-client sentence-transformers httpx +``` + +## Class Definition: Qdrant + +```python +class Qdrant: + def __init__(self, api_key: str, host: str, port: int = 6333, collection_name: str = "qdrant", model_name: str = "BAAI/bge-small-en-v1.5", https: bool = True): + ... +``` + +### Constructor Parameters + +| Parameter | Type | Description | Default Value | +|-----------------|---------|--------------------------------------------------|-----------------------| +| api_key | str | API key for authentication. | - | +| host | str | Host address of the Qdrant server. | - | +| port | int | Port number for the Qdrant server. | 6333 | +| collection_name | str | Name of the collection to be used or created. | "qdrant" | +| model_name | str | Name of the sentence transformer model. | "BAAI/bge-small-en-v1.5" | +| https | bool | Flag to use HTTPS for connection. | True | + +### Methods + +#### `_load_embedding_model(model_name: str)` + +Loads the sentence embedding model. + +#### `_setup_collection()` + +Checks if the specified collection exists in Qdrant; if not, creates it. + +#### `add_vectors(docs: List[dict]) -> OperationResponse` + +Adds vectors to the Qdrant collection. + +#### `search_vectors(query: str, limit: int = 3) -> SearchResult` + +Searches the Qdrant collection for vectors similar to the query vector. + +## Usage Examples + +### Example 1: Setting Up the Qdrant Client + +```python +from qdrant_client import Qdrant + +qdrant_client = Qdrant(api_key="your_api_key", host="localhost", port=6333) +``` + +### Example 2: Adding Vectors to a Collection + +```python +documents = [ + {"page_content": "Sample text 1"}, + {"page_content": "Sample text 2"} +] + +operation_info = qdrant_client.add_vectors(documents) +print(operation_info) +``` + +### Example 3: Searching for Vectors + +```python +search_result = qdrant_client.search_vectors("Sample search query") +print(search_result) +``` + +## Further Information + +Refer to the [Qdrant Documentation](https://qdrant.tech/docs) for more details on the Qdrant vector database. diff --git a/mkdocs.yml b/mkdocs.yml index 3a4e6691..e6daefd3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -101,6 +101,7 @@ nav: - swarms.memory: - PineconeVectorStoreStore: "swarms/memory/pinecone.md" - PGVectorStore: "swarms/memory/pg.md" + - Qdrant: "swarms/memory/qdrant.md" - Guides: - Overview: "examples/index.md" - Agents: diff --git a/swarms/memory/qdrant.py b/swarms/memory/qdrant.py index 8680a6d7..271f7456 100644 --- a/swarms/memory/qdrant.py +++ b/swarms/memory/qdrant.py @@ -1,5 +1,4 @@ from typing import List -from qdrant_client.http.models import CollectionInfoResponse, OperationResponse, SearchResult from sentence_transformers import SentenceTransformer from httpx import RequestError from qdrant_client import QdrantClient @@ -7,6 +6,22 @@ from qdrant_client.http.models import Distance, VectorParams, PointStruct class Qdrant: def __init__(self, api_key: str, host: str, port: int = 6333, collection_name: str = "qdrant", model_name: str = "BAAI/bge-small-en-v1.5", https: bool = True): + """ + Qdrant class for managing collections and performing vector operations using QdrantClient. + + Attributes: + client (QdrantClient): The Qdrant client for interacting with the Qdrant server. + collection_name (str): Name of the collection to be managed in Qdrant. + model (SentenceTransformer): The model used for generating sentence embeddings. + + Args: + api_key (str): API key for authenticating with Qdrant. + host (str): Host address of the Qdrant server. + port (int): Port number of the Qdrant server. Defaults to 6333. + collection_name (str): Name of the collection to be used or created. Defaults to "qdrant". + model_name (str): Name of the model to be used for embeddings. Defaults to "BAAI/bge-small-en-v1.5". + https (bool): Flag to indicate if HTTPS should be used. Defaults to True. + """ try: self.client = QdrantClient(url=host, port=port, api_key=api_key) self.collection_name = collection_name @@ -16,6 +31,12 @@ class Qdrant: print(f"Error setting up QdrantClient: {e}") def _load_embedding_model(self, model_name: str): + """ + Loads the sentence embedding model specified by the model name. + + Args: + model_name (str): The name of the model to load for generating embeddings. + """ try: self.model = SentenceTransformer(model_name) except Exception as e: @@ -34,6 +55,15 @@ class Qdrant: print(f"Collection '{self.collection_name}' created.") def add_vectors(self, docs: List[dict]): + """ + Adds vector representations of documents to the Qdrant collection. + + Args: + docs (List[dict]): A list of documents where each document is a dictionary with at least a 'page_content' key. + + Returns: + OperationResponse or None: Returns the operation information if successful, otherwise None. + """ points = [] for i, doc in enumerate(docs): try: @@ -57,6 +87,16 @@ class Qdrant: return None def search_vectors(self, query: str, limit: int = 3): + """ + Searches the collection for vectors similar to the query vector. + + Args: + query (str): The query string to be converted into a vector and used for searching. + limit (int): The number of search results to return. Defaults to 3. + + Returns: + SearchResult or None: Returns the search results if successful, otherwise None. + """ try: query_vector = self.model.encode(query, normalize_embeddings=True) search_result = self.client.search(