diff --git a/docs/swarms/memory/pinecone.md b/docs/swarms/memory/pinecone.md index cf73ea65..11f9a018 100644 --- a/docs/swarms/memory/pinecone.md +++ b/docs/swarms/memory/pinecone.md @@ -1,4 +1,4 @@ -# `PineconeVectorStoreStore` Documentation +# `PineconDB` Documentation ## Table of Contents diff --git a/mkdocs.yml b/mkdocs.yml index ab173cba..2d200d91 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -105,7 +105,7 @@ nav: - SequentialWorkflow: 'swarms/structs/sequential_workflow.md' - swarms.memory: - Weaviate: "swarms/memory/weaviate.md" - - PineconeVectorStoreStore: "swarms/memory/pinecone.md" + - PineconDB: "swarms/memory/pinecone.md" - PGVectorStore: "swarms/memory/pg.md" - swarms.utils: - phoenix_trace_decorator: "swarms/utils/phoenix_tracer.md" diff --git a/swarms/memory/pinecone.py b/swarms/memory/pinecone.py index 308273d9..f48bb627 100644 --- a/swarms/memory/pinecone.py +++ b/swarms/memory/pinecone.py @@ -1,14 +1,14 @@ from typing import Optional -from swarms.memory.base import BaseVectorStore +from swarms.memory.base_vectordb import VectorDatabase import pinecone from attr import define, field from swarms.utils.hash import str_to_hash @define -class PineconeVectorStoreStore(BaseVectorStore): +class PineconDB(VectorDatabase): """ - PineconeVectorStore is a vector storage driver that uses Pinecone as the underlying storage engine. + PineconDB is a vector storage driver that uses Pinecone as the underlying storage engine. Pinecone is a vector database that allows you to store, search, and retrieve high-dimensional vectors with blazing speed and low latency. It is a managed service that is easy to use and scales effortlessly, so you can @@ -34,14 +34,14 @@ class PineconeVectorStoreStore(BaseVectorStore): Creates a new index. Usage: - >>> from swarms.memory.vector_stores.pinecone import PineconeVectorStore + >>> from swarms.memory.vector_stores.pinecone import PineconDB >>> from swarms.utils.embeddings import USEEmbedding >>> from swarms.utils.hash import str_to_hash >>> from swarms.utils.dataframe import dataframe_to_hash >>> import pandas as pd >>> - >>> # Create a new PineconeVectorStore instance: - >>> pv = PineconeVectorStore( + >>> # Create a new PineconDB instance: + >>> pv = PineconDB( >>> api_key="your-api-key", >>> index_name="your-index-name", >>> environment="us-west1-gcp", @@ -102,7 +102,7 @@ class PineconeVectorStoreStore(BaseVectorStore): self.index = pinecone.Index(self.index_name) - def upsert_vector( + def add( self, vector: list[float], vector_id: Optional[str] = None, @@ -110,7 +110,17 @@ class PineconeVectorStoreStore(BaseVectorStore): meta: Optional[dict] = None, **kwargs, ) -> str: - """Upsert vector""" + """Add a vector to the index. + + Args: + vector (list[float]): _description_ + vector_id (Optional[str], optional): _description_. Defaults to None. + namespace (Optional[str], optional): _description_. Defaults to None. + meta (Optional[dict], optional): _description_. Defaults to None. + + Returns: + str: _description_ + """ vector_id = ( vector_id if vector_id else str_to_hash(str(vector)) ) @@ -121,31 +131,15 @@ class PineconeVectorStoreStore(BaseVectorStore): return vector_id - def load_entry( - self, vector_id: str, namespace: Optional[str] = None - ) -> Optional[BaseVectorStore.Entry]: - """Load entry""" - result = self.index.fetch( - ids=[vector_id], namespace=namespace - ).to_dict() - vectors = list(result["vectors"].values()) - - if len(vectors) > 0: - vector = vectors[0] - - return BaseVectorStore.Entry( - id=vector["id"], - meta=vector["metadata"], - vector=vector["values"], - namespace=result["namespace"], - ) - else: - return None - - def load_entries( - self, namespace: Optional[str] = None - ) -> list[BaseVectorStore.Entry]: - """Load entries""" + def load_entries(self, namespace: Optional[str] = None): + """Load all entries from the index. + + Args: + namespace (Optional[str], optional): _description_. Defaults to None. + + Returns: + _type_: _description_ + """ # This is a hacky way to query up to 10,000 values from Pinecone. Waiting on an official API for fetching # all values from a namespace: # https://community.pinecone.io/t/is-there-a-way-to-query-all-the-vectors-and-or-metadata-from-a-namespace/797/5 @@ -157,15 +151,14 @@ class PineconeVectorStoreStore(BaseVectorStore): namespace=namespace, ) - return [ - BaseVectorStore.Entry( - id=r["id"], - vector=r["values"], - meta=r["metadata"], - namespace=results["namespace"], - ) - for r in results["matches"] - ] + for result in results["matches"]: + entry = { + "id": result["id"], + "vector": result["values"], + "meta": result["metadata"], + "namespace": result["namespace"], + } + return entry def query( self, @@ -173,19 +166,26 @@ class PineconeVectorStoreStore(BaseVectorStore): count: Optional[int] = None, namespace: Optional[str] = None, include_vectors: bool = False, - # PineconeVectorStoreStorageDriver-specific params: + # PineconDBStorageDriver-specific params: include_metadata=True, **kwargs, - ) -> list[BaseVectorStore.QueryResult]: - """Query vectors""" + ): + """Query the index for vectors similar to the given query string. + + Args: + query (str): _description_ + count (Optional[int], optional): _description_. Defaults to None. + namespace (Optional[str], optional): _description_. Defaults to None. + include_vectors (bool, optional): _description_. Defaults to False. + include_metadata (bool, optional): _description_. Defaults to True. + + Returns: + _type_: _description_ + """ vector = self.embedding_driver.embed_string(query) params = { - "top_k": ( - count - if count - else BaseVectorStore.DEFAULT_QUERY_COUNT - ), + "top_k": count, "namespace": namespace, "include_values": include_vectors, "include_metadata": include_metadata, @@ -193,19 +193,22 @@ class PineconeVectorStoreStore(BaseVectorStore): results = self.index.query(vector, **params) - return [ - BaseVectorStore.QueryResult( - id=r["id"], - vector=r["values"], - score=r["score"], - meta=r["metadata"], - namespace=results["namespace"], - ) - for r in results["matches"] - ] + for r in results["matches"]: + entry = { + "id": results["id"], + "vector": results["values"], + "score": results["scores"], + "meta": results["metadata"], + "namespace": results["namespace"], + } + return entry def create_index(self, name: str, **kwargs) -> None: - """Create index""" + """Create a new index. + + Args: + name (str): _description_ + """ params = { "name": name, "dimension": self.embedding_driver.dimensions, diff --git a/tests/memory/test_pinecone.py b/tests/memory/test_pinecone.py index 7c71503e..f43cd6ea 100644 --- a/tests/memory/test_pinecone.py +++ b/tests/memory/test_pinecone.py @@ -1,6 +1,6 @@ import os from unittest.mock import patch -from swarms.memory.pinecone import PineconeVectorStore +from swarms.memory.pinecone import PineconDB api_key = os.getenv("PINECONE_API_KEY") or "" @@ -9,7 +9,7 @@ def test_init(): with patch("pinecone.init") as MockInit, patch( "pinecone.Index" ) as MockIndex: - store = PineconeVectorStore( + store = PineconDB( api_key=api_key, index_name="test_index", environment="test_env", @@ -21,7 +21,7 @@ def test_init(): def test_upsert_vector(): with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: - store = PineconeVectorStore( + store = PineconDB( api_key=api_key, index_name="test_index", environment="test_env", @@ -37,7 +37,7 @@ def test_upsert_vector(): def test_load_entry(): with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: - store = PineconeVectorStore( + store = PineconDB( api_key=api_key, index_name="test_index", environment="test_env", @@ -48,7 +48,7 @@ def test_load_entry(): def test_load_entries(): with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: - store = PineconeVectorStore( + store = PineconDB( api_key=api_key, index_name="test_index", environment="test_env", @@ -59,7 +59,7 @@ def test_load_entries(): def test_query(): with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: - store = PineconeVectorStore( + store = PineconDB( api_key=api_key, index_name="test_index", environment="test_env", @@ -72,7 +72,7 @@ def test_create_index(): with patch("pinecone.init"), patch("pinecone.Index"), patch( "pinecone.create_index" ) as MockCreateIndex: - store = PineconeVectorStore( + store = PineconDB( api_key=api_key, index_name="test_index", environment="test_env",