[PineconDB][REFACTOR]

pull/299/head
Kye 1 year ago
parent b8c859109c
commit 31d0a17352

@ -1,4 +1,4 @@
# `PineconeVectorStoreStore` Documentation # `PineconDB` Documentation
## Table of Contents ## Table of Contents

@ -105,7 +105,7 @@ nav:
- SequentialWorkflow: 'swarms/structs/sequential_workflow.md' - SequentialWorkflow: 'swarms/structs/sequential_workflow.md'
- swarms.memory: - swarms.memory:
- Weaviate: "swarms/memory/weaviate.md" - Weaviate: "swarms/memory/weaviate.md"
- PineconeVectorStoreStore: "swarms/memory/pinecone.md" - PineconDB: "swarms/memory/pinecone.md"
- PGVectorStore: "swarms/memory/pg.md" - PGVectorStore: "swarms/memory/pg.md"
- swarms.utils: - swarms.utils:
- phoenix_trace_decorator: "swarms/utils/phoenix_tracer.md" - phoenix_trace_decorator: "swarms/utils/phoenix_tracer.md"

@ -1,14 +1,14 @@
from typing import Optional from typing import Optional
from swarms.memory.base import BaseVectorStore from swarms.memory.base_vectordb import VectorDatabase
import pinecone import pinecone
from attr import define, field from attr import define, field
from swarms.utils.hash import str_to_hash from swarms.utils.hash import str_to_hash
@define @define
class PineconeVectorStoreStore(BaseVectorStore): class PineconDB(VectorDatabase):
""" """
PineconeVectorStore is a vector storage driver that uses Pinecone as the underlying storage engine. PineconDB is a vector storage driver that uses Pinecone as the underlying storage engine.
Pinecone is a vector database that allows you to store, search, and retrieve high-dimensional vectors with Pinecone is a vector database that allows you to store, search, and retrieve high-dimensional vectors with
blazing speed and low latency. It is a managed service that is easy to use and scales effortlessly, so you can blazing speed and low latency. It is a managed service that is easy to use and scales effortlessly, so you can
@ -34,14 +34,14 @@ class PineconeVectorStoreStore(BaseVectorStore):
Creates a new index. Creates a new index.
Usage: Usage:
>>> from swarms.memory.vector_stores.pinecone import PineconeVectorStore >>> from swarms.memory.vector_stores.pinecone import PineconDB
>>> from swarms.utils.embeddings import USEEmbedding >>> from swarms.utils.embeddings import USEEmbedding
>>> from swarms.utils.hash import str_to_hash >>> from swarms.utils.hash import str_to_hash
>>> from swarms.utils.dataframe import dataframe_to_hash >>> from swarms.utils.dataframe import dataframe_to_hash
>>> import pandas as pd >>> import pandas as pd
>>> >>>
>>> # Create a new PineconeVectorStore instance: >>> # Create a new PineconDB instance:
>>> pv = PineconeVectorStore( >>> pv = PineconDB(
>>> api_key="your-api-key", >>> api_key="your-api-key",
>>> index_name="your-index-name", >>> index_name="your-index-name",
>>> environment="us-west1-gcp", >>> environment="us-west1-gcp",
@ -102,7 +102,7 @@ class PineconeVectorStoreStore(BaseVectorStore):
self.index = pinecone.Index(self.index_name) self.index = pinecone.Index(self.index_name)
def upsert_vector( def add(
self, self,
vector: list[float], vector: list[float],
vector_id: Optional[str] = None, vector_id: Optional[str] = None,
@ -110,7 +110,17 @@ class PineconeVectorStoreStore(BaseVectorStore):
meta: Optional[dict] = None, meta: Optional[dict] = None,
**kwargs, **kwargs,
) -> str: ) -> str:
"""Upsert vector""" """Add a vector to the index.
Args:
vector (list[float]): _description_
vector_id (Optional[str], optional): _description_. Defaults to None.
namespace (Optional[str], optional): _description_. Defaults to None.
meta (Optional[dict], optional): _description_. Defaults to None.
Returns:
str: _description_
"""
vector_id = ( vector_id = (
vector_id if vector_id else str_to_hash(str(vector)) vector_id if vector_id else str_to_hash(str(vector))
) )
@ -121,31 +131,15 @@ class PineconeVectorStoreStore(BaseVectorStore):
return vector_id return vector_id
def load_entry( def load_entries(self, namespace: Optional[str] = None):
self, vector_id: str, namespace: Optional[str] = None """Load all entries from the index.
) -> Optional[BaseVectorStore.Entry]:
"""Load entry""" Args:
result = self.index.fetch( namespace (Optional[str], optional): _description_. Defaults to None.
ids=[vector_id], namespace=namespace
).to_dict() Returns:
vectors = list(result["vectors"].values()) _type_: _description_
"""
if len(vectors) > 0:
vector = vectors[0]
return BaseVectorStore.Entry(
id=vector["id"],
meta=vector["metadata"],
vector=vector["values"],
namespace=result["namespace"],
)
else:
return None
def load_entries(
self, namespace: Optional[str] = None
) -> list[BaseVectorStore.Entry]:
"""Load entries"""
# This is a hacky way to query up to 10,000 values from Pinecone. Waiting on an official API for fetching # This is a hacky way to query up to 10,000 values from Pinecone. Waiting on an official API for fetching
# all values from a namespace: # all values from a namespace:
# https://community.pinecone.io/t/is-there-a-way-to-query-all-the-vectors-and-or-metadata-from-a-namespace/797/5 # https://community.pinecone.io/t/is-there-a-way-to-query-all-the-vectors-and-or-metadata-from-a-namespace/797/5
@ -157,15 +151,14 @@ class PineconeVectorStoreStore(BaseVectorStore):
namespace=namespace, namespace=namespace,
) )
return [ for result in results["matches"]:
BaseVectorStore.Entry( entry = {
id=r["id"], "id": result["id"],
vector=r["values"], "vector": result["values"],
meta=r["metadata"], "meta": result["metadata"],
namespace=results["namespace"], "namespace": result["namespace"],
) }
for r in results["matches"] return entry
]
def query( def query(
self, self,
@ -173,19 +166,26 @@ class PineconeVectorStoreStore(BaseVectorStore):
count: Optional[int] = None, count: Optional[int] = None,
namespace: Optional[str] = None, namespace: Optional[str] = None,
include_vectors: bool = False, include_vectors: bool = False,
# PineconeVectorStoreStorageDriver-specific params: # PineconDBStorageDriver-specific params:
include_metadata=True, include_metadata=True,
**kwargs, **kwargs,
) -> list[BaseVectorStore.QueryResult]: ):
"""Query vectors""" """Query the index for vectors similar to the given query string.
Args:
query (str): _description_
count (Optional[int], optional): _description_. Defaults to None.
namespace (Optional[str], optional): _description_. Defaults to None.
include_vectors (bool, optional): _description_. Defaults to False.
include_metadata (bool, optional): _description_. Defaults to True.
Returns:
_type_: _description_
"""
vector = self.embedding_driver.embed_string(query) vector = self.embedding_driver.embed_string(query)
params = { params = {
"top_k": ( "top_k": count,
count
if count
else BaseVectorStore.DEFAULT_QUERY_COUNT
),
"namespace": namespace, "namespace": namespace,
"include_values": include_vectors, "include_values": include_vectors,
"include_metadata": include_metadata, "include_metadata": include_metadata,
@ -193,19 +193,22 @@ class PineconeVectorStoreStore(BaseVectorStore):
results = self.index.query(vector, **params) results = self.index.query(vector, **params)
return [ for r in results["matches"]:
BaseVectorStore.QueryResult( entry = {
id=r["id"], "id": results["id"],
vector=r["values"], "vector": results["values"],
score=r["score"], "score": results["scores"],
meta=r["metadata"], "meta": results["metadata"],
namespace=results["namespace"], "namespace": results["namespace"],
) }
for r in results["matches"] return entry
]
def create_index(self, name: str, **kwargs) -> None: def create_index(self, name: str, **kwargs) -> None:
"""Create index""" """Create a new index.
Args:
name (str): _description_
"""
params = { params = {
"name": name, "name": name,
"dimension": self.embedding_driver.dimensions, "dimension": self.embedding_driver.dimensions,

@ -1,6 +1,6 @@
import os import os
from unittest.mock import patch from unittest.mock import patch
from swarms.memory.pinecone import PineconeVectorStore from swarms.memory.pinecone import PineconDB
api_key = os.getenv("PINECONE_API_KEY") or "" api_key = os.getenv("PINECONE_API_KEY") or ""
@ -9,7 +9,7 @@ def test_init():
with patch("pinecone.init") as MockInit, patch( with patch("pinecone.init") as MockInit, patch(
"pinecone.Index" "pinecone.Index"
) as MockIndex: ) as MockIndex:
store = PineconeVectorStore( store = PineconDB(
api_key=api_key, api_key=api_key,
index_name="test_index", index_name="test_index",
environment="test_env", environment="test_env",
@ -21,7 +21,7 @@ def test_init():
def test_upsert_vector(): def test_upsert_vector():
with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
store = PineconeVectorStore( store = PineconDB(
api_key=api_key, api_key=api_key,
index_name="test_index", index_name="test_index",
environment="test_env", environment="test_env",
@ -37,7 +37,7 @@ def test_upsert_vector():
def test_load_entry(): def test_load_entry():
with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
store = PineconeVectorStore( store = PineconDB(
api_key=api_key, api_key=api_key,
index_name="test_index", index_name="test_index",
environment="test_env", environment="test_env",
@ -48,7 +48,7 @@ def test_load_entry():
def test_load_entries(): def test_load_entries():
with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
store = PineconeVectorStore( store = PineconDB(
api_key=api_key, api_key=api_key,
index_name="test_index", index_name="test_index",
environment="test_env", environment="test_env",
@ -59,7 +59,7 @@ def test_load_entries():
def test_query(): def test_query():
with patch("pinecone.init"), patch("pinecone.Index") as MockIndex: with patch("pinecone.init"), patch("pinecone.Index") as MockIndex:
store = PineconeVectorStore( store = PineconDB(
api_key=api_key, api_key=api_key,
index_name="test_index", index_name="test_index",
environment="test_env", environment="test_env",
@ -72,7 +72,7 @@ def test_create_index():
with patch("pinecone.init"), patch("pinecone.Index"), patch( with patch("pinecone.init"), patch("pinecone.Index"), patch(
"pinecone.create_index" "pinecone.create_index"
) as MockCreateIndex: ) as MockCreateIndex:
store = PineconeVectorStore( store = PineconDB(
api_key=api_key, api_key=api_key,
index_name="test_index", index_name="test_index",
environment="test_env", environment="test_env",

Loading…
Cancel
Save