From 305d02bd90bd2f937a2119844a18a41de9ac114e Mon Sep 17 00:00:00 2001 From: Sashin Date: Mon, 27 Nov 2023 19:29:32 +0200 Subject: [PATCH] Initiaal push --- docs/swarms/memory/qdrant.md | 81 ++++++++++++++++++++++++++++++++++++ mkdocs.yml | 1 + swarms/memory/qdrant.py | 42 ++++++++++++++++++- 3 files changed, 123 insertions(+), 1 deletion(-) create mode 100644 docs/swarms/memory/qdrant.md diff --git a/docs/swarms/memory/qdrant.md b/docs/swarms/memory/qdrant.md new file mode 100644 index 00000000..3717d94f --- /dev/null +++ b/docs/swarms/memory/qdrant.md @@ -0,0 +1,81 @@ +# Qdrant Client Library + +## Overview + +The Qdrant Client Library is designed for interacting with the Qdrant vector database, allowing efficient storage and retrieval of high-dimensional vector data. It integrates with machine learning models for embedding and is particularly suited for search and recommendation systems. + +## Installation + +```python +pip install qdrant-client sentence-transformers httpx +``` + +## Class Definition: Qdrant + +```python +class Qdrant: + def __init__(self, api_key: str, host: str, port: int = 6333, collection_name: str = "qdrant", model_name: str = "BAAI/bge-small-en-v1.5", https: bool = True): + ... +``` + +### Constructor Parameters + +| Parameter | Type | Description | Default Value | +|-----------------|---------|--------------------------------------------------|-----------------------| +| api_key | str | API key for authentication. | - | +| host | str | Host address of the Qdrant server. | - | +| port | int | Port number for the Qdrant server. | 6333 | +| collection_name | str | Name of the collection to be used or created. | "qdrant" | +| model_name | str | Name of the sentence transformer model. | "BAAI/bge-small-en-v1.5" | +| https | bool | Flag to use HTTPS for connection. | True | + +### Methods + +#### `_load_embedding_model(model_name: str)` + +Loads the sentence embedding model. + +#### `_setup_collection()` + +Checks if the specified collection exists in Qdrant; if not, creates it. + +#### `add_vectors(docs: List[dict]) -> OperationResponse` + +Adds vectors to the Qdrant collection. + +#### `search_vectors(query: str, limit: int = 3) -> SearchResult` + +Searches the Qdrant collection for vectors similar to the query vector. + +## Usage Examples + +### Example 1: Setting Up the Qdrant Client + +```python +from qdrant_client import Qdrant + +qdrant_client = Qdrant(api_key="your_api_key", host="localhost", port=6333) +``` + +### Example 2: Adding Vectors to a Collection + +```python +documents = [ + {"page_content": "Sample text 1"}, + {"page_content": "Sample text 2"} +] + +operation_info = qdrant_client.add_vectors(documents) +print(operation_info) +``` + +### Example 3: Searching for Vectors + +```python +search_result = qdrant_client.search_vectors("Sample search query") +print(search_result) +``` + +## Further Information + +Refer to the [Qdrant Documentation](https://qdrant.tech/docs) for more details on the Qdrant vector database. diff --git a/mkdocs.yml b/mkdocs.yml index 3a4e6691..e6daefd3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -101,6 +101,7 @@ nav: - swarms.memory: - PineconeVectorStoreStore: "swarms/memory/pinecone.md" - PGVectorStore: "swarms/memory/pg.md" + - Qdrant: "swarms/memory/qdrant.md" - Guides: - Overview: "examples/index.md" - Agents: diff --git a/swarms/memory/qdrant.py b/swarms/memory/qdrant.py index 8680a6d7..271f7456 100644 --- a/swarms/memory/qdrant.py +++ b/swarms/memory/qdrant.py @@ -1,5 +1,4 @@ from typing import List -from qdrant_client.http.models import CollectionInfoResponse, OperationResponse, SearchResult from sentence_transformers import SentenceTransformer from httpx import RequestError from qdrant_client import QdrantClient @@ -7,6 +6,22 @@ from qdrant_client.http.models import Distance, VectorParams, PointStruct class Qdrant: def __init__(self, api_key: str, host: str, port: int = 6333, collection_name: str = "qdrant", model_name: str = "BAAI/bge-small-en-v1.5", https: bool = True): + """ + Qdrant class for managing collections and performing vector operations using QdrantClient. + + Attributes: + client (QdrantClient): The Qdrant client for interacting with the Qdrant server. + collection_name (str): Name of the collection to be managed in Qdrant. + model (SentenceTransformer): The model used for generating sentence embeddings. + + Args: + api_key (str): API key for authenticating with Qdrant. + host (str): Host address of the Qdrant server. + port (int): Port number of the Qdrant server. Defaults to 6333. + collection_name (str): Name of the collection to be used or created. Defaults to "qdrant". + model_name (str): Name of the model to be used for embeddings. Defaults to "BAAI/bge-small-en-v1.5". + https (bool): Flag to indicate if HTTPS should be used. Defaults to True. + """ try: self.client = QdrantClient(url=host, port=port, api_key=api_key) self.collection_name = collection_name @@ -16,6 +31,12 @@ class Qdrant: print(f"Error setting up QdrantClient: {e}") def _load_embedding_model(self, model_name: str): + """ + Loads the sentence embedding model specified by the model name. + + Args: + model_name (str): The name of the model to load for generating embeddings. + """ try: self.model = SentenceTransformer(model_name) except Exception as e: @@ -34,6 +55,15 @@ class Qdrant: print(f"Collection '{self.collection_name}' created.") def add_vectors(self, docs: List[dict]): + """ + Adds vector representations of documents to the Qdrant collection. + + Args: + docs (List[dict]): A list of documents where each document is a dictionary with at least a 'page_content' key. + + Returns: + OperationResponse or None: Returns the operation information if successful, otherwise None. + """ points = [] for i, doc in enumerate(docs): try: @@ -57,6 +87,16 @@ class Qdrant: return None def search_vectors(self, query: str, limit: int = 3): + """ + Searches the collection for vectors similar to the query vector. + + Args: + query (str): The query string to be converted into a vector and used for searching. + limit (int): The number of search results to return. Defaults to 3. + + Returns: + SearchResult or None: Returns the search results if successful, otherwise None. + """ try: query_vector = self.model.encode(query, normalize_embeddings=True) search_result = self.client.search(