[PineconDB][REFACTOR]

2 years ago · 31d0a17352
parent b8c859109c
commit 31d0a17352
4 changed files with 73 additions and 70 deletions
--- a/docs/swarms/memory/pinecone.md
+++ b/docs/swarms/memory/pinecone.md
@ -1,4 +1,4 @@
-# `PineconeVectorStoreStore` Documentation
+# `PineconDB` Documentation

 ## Table of Contents

--- a/mkdocs.yml
+++ b/mkdocs.yml
@ -105,7 +105,7 @@ nav:
    - SequentialWorkflow: 'swarms/structs/sequential_workflow.md'
  - swarms.memory:
    - Weaviate: "swarms/memory/weaviate.md"
-    - PineconeVectorStoreStore: "swarms/memory/pinecone.md"
+    - PineconDB: "swarms/memory/pinecone.md"
    - PGVectorStore: "swarms/memory/pg.md"
  - swarms.utils:
    - phoenix_trace_decorator: "swarms/utils/phoenix_tracer.md"
--- a/swarms/memory/pinecone.py
+++ b/swarms/memory/pinecone.py
@ -1,14 +1,14 @@
 from typing import Optional
-from swarms.memory.base import BaseVectorStore
+from swarms.memory.base_vectordb import VectorDatabase
 import pinecone
 from attr import define, field
 from swarms.utils.hash import str_to_hash


@define
-class PineconeVectorStoreStore(BaseVectorStore):
+class PineconDB(VectorDatabase):
    """
-    PineconeVectorStore is a vector storage driver that uses Pinecone as the underlying storage engine.
+    PineconDB is a vector storage driver that uses Pinecone as the underlying storage engine.

    Pinecone is a vector database that allows you to store, search, and retrieve high-dimensional vectors with
    blazing speed and low latency. It is a managed service that is easy to use and scales effortlessly, so you can
@ -34,14 +34,14 @@ class PineconeVectorStoreStore(BaseVectorStore):
            Creates a new index.

    Usage:
-    >>> from swarms.memory.vector_stores.pinecone import PineconeVectorStore
+    >>> from swarms.memory.vector_stores.pinecone import PineconDB
    >>> from swarms.utils.embeddings import USEEmbedding
    >>> from swarms.utils.hash import str_to_hash
    >>> from swarms.utils.dataframe import dataframe_to_hash
    >>> import pandas as pd
    >>>
-    >>> # Create a new PineconeVectorStore instance:
-    >>> pv = PineconeVectorStore(
+    >>> # Create a new PineconDB instance:
+    >>> pv = PineconDB(
    >>>     api_key="your-api-key",
    >>>     index_name="your-index-name",
    >>>     environment="us-west1-gcp",
@ -102,7 +102,7 @@ class PineconeVectorStoreStore(BaseVectorStore):

        self.index = pinecone.Index(self.index_name)

-    def upsert_vector(
+    def add(
        self,
        vector: list[float],
        vector_id: Optional[str] = None,
@ -110,7 +110,17 @@ class PineconeVectorStoreStore(BaseVectorStore):
        meta: Optional[dict] = None,
        **kwargs,
    ) -> str:
-        """Upsert vector"""
+        """Add a vector to the index.
+
+        Args:
+            vector (list[float]): _description_
+            vector_id (Optional[str], optional): _description_. Defaults to None.
+            namespace (Optional[str], optional): _description_. Defaults to None.
+            meta (Optional[dict], optional): _description_. Defaults to None.
+
+        Returns:
+            str: _description_
+        """
        vector_id = (
            vector_id if vector_id else str_to_hash(str(vector))
        )
@ -121,31 +131,15 @@ class PineconeVectorStoreStore(BaseVectorStore):

        return vector_id

-    def load_entry(
-        self, vector_id: str, namespace: Optional[str] = None
-    ) -> Optional[BaseVectorStore.Entry]:
-        """Load entry"""
-        result = self.index.fetch(
-            ids=[vector_id], namespace=namespace
-        ).to_dict()
-        vectors = list(result["vectors"].values())
-
-        if len(vectors) > 0:
-            vector = vectors[0]
-
-            return BaseVectorStore.Entry(
-                id=vector["id"],
-                meta=vector["metadata"],
-                vector=vector["values"],
-                namespace=result["namespace"],
-            )
-        else:
-            return None
-
-    def load_entries(
-        self, namespace: Optional[str] = None
-    ) -> list[BaseVectorStore.Entry]:
-        """Load entries"""
+    def load_entries(self, namespace: Optional[str] = None):
+        """Load all entries from the index.
+
+        Args:
+            namespace (Optional[str], optional): _description_. Defaults to None.
+
+        Returns:
+            _type_: _description_
+        """
        # This is a hacky way to query up to 10,000 values from Pinecone. Waiting on an official API for fetching
        # all values from a namespace:
        # https://community.pinecone.io/t/is-there-a-way-to-query-all-the-vectors-and-or-metadata-from-a-namespace/797/5
@ -157,15 +151,14 @@ class PineconeVectorStoreStore(BaseVectorStore):
            namespace=namespace,
        )

-        return [
-            BaseVectorStore.Entry(
-                id=r["id"],
-                vector=r["values"],
-                meta=r["metadata"],
-                namespace=results["namespace"],
-            )
-            for r in results["matches"]
-        ]
+        for result in results["matches"]:
+            entry = {
+                "id": result["id"],
+                "vector": result["values"],
+                "meta": result["metadata"],
+                "namespace": result["namespace"],
+            }
+            return entry

    def query(
        self,
@ -173,19 +166,26 @@ class PineconeVectorStoreStore(BaseVectorStore):
        count: Optional[int] = None,
        namespace: Optional[str] = None,
        include_vectors: bool = False,
-        # PineconeVectorStoreStorageDriver-specific params:
+        # PineconDBStorageDriver-specific params:
        include_metadata=True,
        **kwargs,
-    ) -> list[BaseVectorStore.QueryResult]:
-        """Query vectors"""
+    ):
+        """Query the index for vectors similar to the given query string.
+
+        Args:
+            query (str): _description_
+            count (Optional[int], optional): _description_. Defaults to None.
+            namespace (Optional[str], optional): _description_. Defaults to None.
+            include_vectors (bool, optional): _description_. Defaults to False.
+            include_metadata (bool, optional): _description_. Defaults to True.
+
+        Returns:
+            _type_: _description_
+        """
        vector = self.embedding_driver.embed_string(query)

        params = {
-            "top_k": (
-                count
-                if count
-                else BaseVectorStore.DEFAULT_QUERY_COUNT
-            ),
+            "top_k": count,
            "namespace": namespace,
            "include_values": include_vectors,
            "include_metadata": include_metadata,
@ -193,19 +193,22 @@ class PineconeVectorStoreStore(BaseVectorStore):

        results = self.index.query(vector, **params)

-        return [
-            BaseVectorStore.QueryResult(
-                id=r["id"],
-                vector=r["values"],
-                score=r["score"],
-                meta=r["metadata"],
-                namespace=results["namespace"],
-            )
-            for r in results["matches"]
-        ]
+        for r in results["matches"]:
+            entry = {
+                "id": results["id"],
+                "vector": results["values"],
+                "score": results["scores"],
+                "meta": results["metadata"],
+                "namespace": results["namespace"],
+            }
+            return entry

    def create_index(self, name: str, **kwargs) -> None:
-        """Create index"""
+        """Create a new index.
+
+        Args:
+            name (str): _description_
+        """
        params = {
            "name": name,
            "dimension": self.embedding_driver.dimensions,
--- a/tests/memory/test_pinecone.py
+++ b/tests/memory/test_pinecone.py
@ -1,6 +1,6 @@
 import os
 from unittest.mock import patch
-from swarms.memory.pinecone import PineconeVectorStore
+from swarms.memory.pinecone import PineconDB

 api_key = os.getenv("PINECONE_API_KEY") or ""

@ -9,7 +9,7 @@ def test_init():
    with patch("pinecone.init") as MockInit, patch(
        "pinecone.Index"
    ) as MockIndex:
-        store = PineconeVectorStore(
+        store = PineconDB(
            api_key=api_key,
            index_name="test_index",
            environment="test_env",
@ -21,7 +21,7 @@ def test_init():

 def test_upsert_vector():
    with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
-        store = PineconeVectorStore(
+        store = PineconDB(
            api_key=api_key,
            index_name="test_index",
            environment="test_env",
@ -37,7 +37,7 @@ def test_upsert_vector():

 def test_load_entry():
    with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
-        store = PineconeVectorStore(
+        store = PineconDB(
            api_key=api_key,
            index_name="test_index",
            environment="test_env",
@ -48,7 +48,7 @@ def test_load_entry():

 def test_load_entries():
    with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
-        store = PineconeVectorStore(
+        store = PineconDB(
            api_key=api_key,
            index_name="test_index",
            environment="test_env",
@ -59,7 +59,7 @@ def test_load_entries():

 def test_query():
    with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
-        store = PineconeVectorStore(
+        store = PineconDB(
            api_key=api_key,
            index_name="test_index",
            environment="test_env",
@ -72,7 +72,7 @@ def test_create_index():
    with patch("pinecone.init"), patch("pinecone.Index"), patch(
        "pinecone.create_index"
    ) as MockCreateIndex:
-        store = PineconeVectorStore(
+        store = PineconDB(
            api_key=api_key,
            index_name="test_index",
            environment="test_env",