pull/64/head
Kye 1 year ago
parent a5dcc0f175
commit 7b69b954df

@ -0,0 +1,93 @@
Create multi-page long and explicit professional pytorch-like documentation for the swarms code below follow the outline for the swarms library, provide many examples and teach the user about the code, provide examples for every function, make the documentation 10,000 words, provide many usage examples and note this is markdown docs, create the documentation for the code to document.
Now make the professional documentation for this code, provide the architecture and how the class works and why it works that way, it's purpose, provide args, their types, 3 ways of usage examples, in examples use from shapeless import x
BE VERY EXPLICIT AND THOROUGH, MAKE IT DEEP AND USEFUL
########
Step 1: Understand the purpose and functionality of the module or framework
Read and analyze the description provided in the documentation to understand the purpose and functionality of the module or framework.
Identify the key features, parameters, and operations performed by the module or framework.
Step 2: Provide an overview and introduction
Start the documentation by providing a brief overview and introduction to the module or framework.
Explain the importance and relevance of the module or framework in the context of the problem it solves.
Highlight any key concepts or terminology that will be used throughout the documentation.
Step 3: Provide a class or function definition
Provide the class or function definition for the module or framework.
Include the parameters that need to be passed to the class or function and provide a brief description of each parameter.
Specify the data types and default values for each parameter.
Step 4: Explain the functionality and usage
Provide a detailed explanation of how the module or framework works and what it does.
Describe the steps involved in using the module or framework, including any specific requirements or considerations.
Provide code examples to demonstrate the usage of the module or framework.
Explain the expected inputs and outputs for each operation or function.
Step 5: Provide additional information and tips
Provide any additional information or tips that may be useful for using the module or framework effectively.
Address any common issues or challenges that developers may encounter and provide recommendations or workarounds.
Step 6: Include references and resources
Include references to any external resources or research papers that provide further information or background on the module or framework.
Provide links to relevant documentation or websites for further exploration.
Example Template for the given documentation:
# Module/Function Name: MultiheadAttention
class torch.nn.MultiheadAttention(embed_dim, num_heads, dropout=0.0, bias=True, add_bias_kv=False, add_zero_attn=False, kdim=None, vdim=None, batch_first=False, device=None, dtype=None):
"""
Creates a multi-head attention module for joint information representation from the different subspaces.
Parameters:
- embed_dim (int): Total dimension of the model.
- num_heads (int): Number of parallel attention heads. The embed_dim will be split across num_heads.
- dropout (float): Dropout probability on attn_output_weights. Default: 0.0 (no dropout).
- bias (bool): If specified, adds bias to input/output projection layers. Default: True.
- add_bias_kv (bool): If specified, adds bias to the key and value sequences at dim=0. Default: False.
- add_zero_attn (bool): If specified, adds a new batch of zeros to the key and value sequences at dim=1. Default: False.
- kdim (int): Total number of features for keys. Default: None (uses kdim=embed_dim).
- vdim (int): Total number of features for values. Default: None (uses vdim=embed_dim).
- batch_first (bool): If True, the input and output tensors are provided as (batch, seq, feature). Default: False.
- device (torch.device): If specified, the tensors will be moved to the specified device.
- dtype (torch.dtype): If specified, the tensors will have the specified dtype.
"""
def forward(query, key, value, key_padding_mask=None, need_weights=True, attn_mask=None, average_attn_weights=True, is_causal=False):
"""
Forward pass of the multi-head attention module.
Parameters:
- query (Tensor): Query embeddings of shape (L, E_q) for unbatched input, (L, N, E_q) when batch_first=False, or (N, L, E_q) when batch_first=True.
- key (Tensor): Key embeddings of shape (S, E_k) for unbatched input, (S, N, E_k) when batch_first=False, or (N, S, E_k) when batch_first=True.
- value (Tensor): Value embeddings of shape (S, E_v) for unbatched input, (S, N, E_v) when batch_first=False, or (N, S, E_v) when batch_first=True.
- key_padding_mask (Optional[Tensor]): If specified, a mask indicating elements to be ignored in key for attention computation.
- need_weights (bool): If specified, returns attention weights in addition to attention outputs. Default: True.
- attn_mask (Optional[Tensor]): If specified, a mask preventing attention to certain positions.
- average_attn_weights (bool): If true, returns averaged attention weights per head. Otherwise, returns attention weights separately per head. Note that this flag only has an effect when need_weights=True. Default: True.
- is_causal (bool): If specified, applies a causal mask as the attention mask. Default: False.
Returns:
Tuple[Tensor, Optional[Tensor]]:
- attn_output (Tensor): Attention outputs of shape (L, E) for unbatched input, (L, N, E) when batch_first=False, or (N, L, E) when batch_first=True.
- attn_output_weights (Optional[Tensor]): Attention weights of shape (L, S) when unbatched or (N, L, S) when batched. Optional, only returned when need_weights=True.
"""
# Implementation of the forward pass of the attention module goes here
return attn_output, attn_output_weights
# Usage example:
multihead_attn = nn.MultiheadAttention(embed_dim, num_heads)
attn_output, attn_output_weights = multihead_attn(query, key, value)
Note:
The above template includes the class or function definition, parameters, description, and usage example.
To replicate the documentation for any other module or framework, follow the same structure and provide the specific details for that module or framework.
############# CODE TO DOCUMENT, DOCUMENT THE

@ -0,0 +1,343 @@
# `PgVectorVectorStore` Documentation
## Table of Contents
1. [Introduction](#introduction)
2. [Overview](#overview)
3. [Class Definition](#class-definition)
4. [Functionality and Usage](#functionality-and-usage)
- [Setting Up the Database](#setting-up-the-database)
- [Upserting Vectors](#upserting-vectors)
- [Loading Vector Entries](#loading-vector-entries)
- [Querying Vectors](#querying-vectors)
5. [Additional Information](#additional-information)
6. [References and Resources](#references-and-resources)
---
## 1. Introduction <a name="introduction"></a>
Welcome to the documentation for the Swarms `PgVectorVectorStore` class! Swarms is a library that provides various memory and storage options for high-dimensional vectors. In this documentation, we will focus on the `PgVectorVectorStore` class, which is a vector storage driver that uses PostgreSQL with the PGVector extension as the underlying storage engine.
### 1.1 Purpose
The `PgVectorVectorStore` class allows you to interact with a PostgreSQL database and store high-dimensional vectors efficiently. By using Swarms with PostgreSQL and PGVector, you can manage and work with vector data in your applications with ease.
### 1.2 Key Features
- Integration with PostgreSQL and PGVector for vector storage.
- Simple and convenient API for upserting vectors, querying, and loading entries.
- Support for creating and managing vector collections in PostgreSQL.
---
## 2. Overview <a name="overview"></a>
Before diving into the details of the `PgVectorVectorStore` class, let's provide an overview of its purpose and functionality.
The `PgVectorVectorStore` class is designed to:
- Store high-dimensional vectors in a PostgreSQL database with the PGVector extension.
- Offer a seamless and efficient way to upsert vectors into the database.
- Provide methods for loading individual vector entries or all vector entries in a collection.
- Support vector queries, allowing you to find vectors similar to a given query vector.
In the following sections, we will explore the class definition, its parameters, and how to use it effectively.
---
## 3. Class Definition <a name="class-definition"></a>
Let's start by examining the class definition of `PgVectorVectorStore`, including its attributes and parameters.
```python
class PgVectorVectorStore(BaseVectorStore):
"""
A vector store driver to Postgres using the PGVector extension.
Attributes:
connection_string: An optional string describing the target Postgres database instance.
create_engine_params: Additional configuration params passed when creating the database connection.
engine: An optional sqlalchemy Postgres engine to use.
table_name: Optionally specify the name of the table to used to store vectors.
...
"""
```
Attributes:
- `connection_string` (Optional[str]): An optional string describing the target Postgres database instance.
- `create_engine_params` (dict): Additional configuration parameters passed when creating the database connection.
- `engine` (Optional[Engine]): An optional SQLAlchemy Postgres engine to use.
- `table_name` (str): Optionally specify the name of the table to be used to store vectors.
### 3.1 Attribute Validators
The class includes validators for the `connection_string` and `engine` attributes to ensure their proper usage. These validators help maintain consistency in attribute values.
### 3.2 Initialization
During initialization, the class checks if an engine is provided. If an engine is not provided, it creates a new database connection using the `connection_string` and `create_engine_params`.
---
## 4. Functionality and Usage <a name="functionality-and-usage"></a>
In this section, we will explore the functionality of the `PgVectorVectorStore` class and provide detailed instructions on how to use it effectively.
### 4.1 Setting Up the Database <a name="setting-up-the-database"></a>
Before using the `PgVectorVectorStore` to store and query vectors, you need to set up the database. This includes creating the necessary extensions and database schema. You can do this using the `setup` method.
```python
def setup(
self,
create_schema: bool = True,
install_uuid_extension: bool = True,
install_vector_extension: bool = True,
) -> None:
"""
Provides a mechanism to initialize the database schema and extensions.
Parameters:
- create_schema (bool): If True, creates the necessary database schema for vector storage. Default: True.
- install_uuid_extension (bool): If True, installs the UUID extension in the database. Default: True.
- install_vector_extension (bool): If True, installs the PGVector extension in the database. Default: True.
"""
```
#### Example 1: Setting Up the Database
```python
# Initialize the PgVectorVectorStore instance
vector_store = PgVectorVectorStore(connection_string="your-db-connection-string", table_name="your-table-name")
# Set up the database with default settings
vector_store.setup()
```
#### Example 2: Customized Database Setup
```python
# Initialize the PgVectorVectorStore instance
vector_store = PgVectorVectorStore(connection_string="your-db-connection-string", table_name="your-table-name")
# Set up the database with customized settings
vector_store.setup(create_schema=False, install_uuid_extension=True, install_vector_extension=True)
```
### 4.2 Upserting Vectors <a name="upserting-vectors"></a>
The `upsert_vector` method allows you to insert or update a vector in the collection. You can specify the vector, an optional vector ID, namespace, and metadata.
```python
def upsert_vector(
self,
vector: list[float],
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
) -> str:
"""
Inserts or updates a vector in the collection.
Parameters:
- vector (list[float]): The vector to upsert.
- vector_id (Optional[str]): An optional ID for the vector. If not provided, a unique ID will be generated.
- namespace (Optional[str]): An optional namespace for the vector.
- meta (Optional[dict]): An optional metadata dictionary associated with the vector.
- **kwargs: Additional keyword arguments.
Returns:
- str: The ID of the upserted vector.
"""
```
#### Example: Upserting a Vector
```python
# Initialize the PgVectorVectorStore instance
vector_store = PgVectorVectorStore(connection_string="your-db-connection-string", table_name="your-table-name")
# Define a vector and upsert it
vector = [0.1, 0.2, 0.3, 0.4]
vector_id = "unique-vector-id"
namespace = "your-namespace"
meta = {"key1": "value1", "key2": "value2"}
vector_store.upsert_vector(
vector=vector,
vector_id=vector_id,
namespace=namespace,
meta=meta
)
```
### 4.3 Loading Vector Entries <a name="loading-vector-entries"></a>
You can load vector entries from the collection using the `load_entry` and `load_entries` methods.
#### 4
.3.1 Loading a Single Entry
The `load_entry` method allows you to load a specific vector entry based on its identifier and optional namespace.
```python
def load_entry(
self, vector_id: str, namespace: Optional[str] = None
) -> BaseVectorStore.Entry:
"""
Retrieves a specific vector entry from the collection based on its identifier and optional namespace.
Parameters:
- vector_id (str): The ID of the vector to retrieve.
- namespace (Optional[str]): An optional namespace for filtering. Default: None.
Returns:
- BaseVectorStore.Entry: The loaded vector entry.
"""
```
#### Example: Loading a Single Entry
```python
# Initialize the PgVectorVectorStore instance
vector_store = PgVectorVectorStore(connection_string="your-db-connection-string", table_name="your-table-name")
# Load a specific vector entry
loaded_entry = vector_store.load_entry(vector_id="unique-vector-id", namespace="your-namespace")
if loaded_entry is not None:
loaded_vector = loaded_entry.vector
loaded_meta = loaded_entry.meta
# Use the loaded vector and metadata as needed
else:
# Vector not found
```
#### 4.3.2 Loading Multiple Entries
The `load_entries` method allows you to load all vector entries from the collection, optionally filtering by namespace.
```python
def load_entries(
self, namespace: Optional[str] = None
) -> list[BaseVectorStore.Entry]:
"""
Retrieves all vector entries from the collection, optionally filtering to only those that match the provided namespace.
Parameters:
- namespace (Optional[str]): An optional namespace for filtering. Default: None.
Returns:
- list[BaseVectorStore.Entry]: A list of loaded vector entries.
"""
```
#### Example: Loading Multiple Entries
```python
# Initialize the PgVectorVectorStore instance
vector_store = PgVectorVectorStore(connection_string="your-db-connection-string", table_name="your-table-name")
# Load all vector entries in the specified namespace
entries = vector_store.load_entries(namespace="your-namespace")
# Process the loaded entries
for entry in entries:
vector_id = entry.id
vector = entry.vector
meta = entry.meta
# Handle the loaded entries as needed
```
### 4.4 Querying Vectors <a name="querying-vectors"></a>
You can perform vector queries to find vectors similar to a given query vector using the `query` method. You can specify the query string, the maximum number of results to return, and other options.
```python
def query(
self,
query: str,
count: Optional[int] = BaseVectorStore.DEFAULT_QUERY_COUNT,
namespace: Optional[str] = None,
include_vectors: bool = False,
distance_metric: str = "cosine_distance",
**kwargs
) -> list[BaseVectorStore.QueryResult]:
"""
Performs a search on the collection to find vectors similar to the provided input vector,
optionally filtering to only those that match the provided namespace.
Parameters:
- query (str): The query string to find similar vectors.
- count (Optional[int]): Maximum number of results to return. Default: BaseVectorStore.DEFAULT_QUERY_COUNT.
- namespace (Optional[str]): An optional namespace for filtering. Default: None.
- include_vectors (bool): If True, includes vectors in the query results. Default: False.
- distance_metric (str): The distance metric to use for similarity measurement.
Options: "cosine_distance", "l2_distance", "inner_product". Default: "cosine_distance".
- **kwargs: Additional keyword arguments.
Returns:
- list[BaseVectorStore.QueryResult]: A list of query results, each containing vector ID, vector (if included), score, and metadata.
"""
```
#### Example: Querying Vectors
```python
# Initialize the PgVectorVectorStore instance
vector_store = PgVectorVectorStore(connection_string="your-db-connection-string", table_name="your-table-name")
# Perform a vector query
query_string = "your-query-string"
count = 10 # Maximum number of results to return
namespace = "your-namespace"
include_vectors = False # Set to True to include vectors in results
distance_metric = "cosine_distance"
results = vector_store.query(
query=query_string,
count=count,
namespace=namespace,
include_vectors=include_vectors,
distance_metric=distance_metric
)
# Process the query results
for result in results:
vector_id = result.id
vector = result.vector
score = result.score
meta = result.meta
# Handle the results as needed
```
---
## 5. Additional Information <a name="additional-information"></a>
Here are some additional tips and information for using the `PgVectorVectorStore` class effectively:
- When upserting vectors, you can generate a unique vector ID using a hash of the vector's content to ensure uniqueness.
- Consider using namespaces to organize and categorize vectors within your PostgreSQL database.
- You can choose from different distance metrics (cosine distance, L2 distance, inner product) for vector querying based on your application's requirements.
- Keep your database connection string secure and follow best practices for database access control.
---
## 6. References and Resources <a name="references-and-resources"></a>
Here are some references and resources for further information on Swarms and PostgreSQL with PGVector:
- [Swarms GitHub Repository](https://github.com/swarms): Swarms library on GitHub for updates and contributions.
- [PostgreSQL Official Website](https://www.postgresql.org/): Official PostgreSQL website for documentation and resources.
- [PGVector GitHub Repository](https://github.com/ankane/pgvector): PGVector extension on GitHub for detailed information.
---
This concludes the documentation for the Swarms `PgVectorVectorStore` class. You now have a comprehensive understanding of how to use Swarms with PostgreSQL and PGVector for vector storage. If you have any further questions or need assistance, please refer to the provided references and resources. Happy coding!

@ -0,0 +1,298 @@
# `PineconeVectorStoreStore` Documentation
## Table of Contents
1. [Introduction](#introduction)
2. [PineconeVector Class](#pineconevector-class)
3. [Installation](#installation)
4. [Usage](#usage)
- [Creating a PineconeVector Instance](#creating-a-pineconevector-instance)
- [Creating an Index](#creating-an-index)
- [Upserting Vectors](#upserting-vectors)
- [Querying the Index](#querying-the-index)
- [Loading an Entry](#loading-an-entry)
- [Loading Entries](#loading-entries)
5. [Additional Information](#additional-information)
6. [References and Resources](#references-and-resources)
---
## 1. Introduction <a name="introduction"></a>
Welcome to the Swarms documentation! Swarms is a library that provides various memory and storage options for high-dimensional vectors. In this documentation, we will focus on the `PineconeVector` class, which is a vector storage driver that uses Pinecone as the underlying storage engine.
### 1.1 Purpose
The `PineconeVector` class allows you to interact with Pinecone, a vector database that enables the storage, search, and retrieval of high-dimensional vectors with speed and low latency. By using Swarms with Pinecone, you can easily manage and work with vector data in your applications without the need to manage infrastructure.
### 1.2 Key Features
- Seamless integration with Pinecone for vector storage.
- Simple and convenient API for upserting vectors, querying, and loading entries.
- Support for creating and managing indexes.
---
## 2. PineconeVector Class <a name="pineconevector-class"></a>
The `PineconeVector` class is the core component of Swarms that interacts with Pinecone for vector storage. Below, we will provide an in-depth overview of this class, including its purpose, parameters, and methods.
### 2.1 Class Definition
```python
class PineconeVector(BaseVector):
```
### 2.2 Parameters
The `PineconeVector` class accepts the following parameters during initialization:
- `api_key` (str): The API key for your Pinecone account.
- `index_name` (str): The name of the index to use.
- `environment` (str): The environment to use. Either "us-west1-gcp" or "us-east1-gcp".
- `project_name` (str, optional): The name of the project to use. Defaults to `None`.
- `index` (pinecone.Index, optional): The Pinecone index to use. Defaults to `None`.
### 2.3 Methods
The `PineconeVector` class provides several methods for interacting with Pinecone:
#### 2.3.1 `upsert_vector`
```python
def upsert_vector(
self,
vector: list[float],
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
) -> str:
```
Upserts a vector into the index.
- `vector` (list[float]): The vector to upsert.
- `vector_id` (Optional[str]): An optional ID for the vector. If not provided, a unique ID will be generated.
- `namespace` (Optional[str]): An optional namespace for the vector.
- `meta` (Optional[dict]): An optional metadata dictionary associated with the vector.
- `**kwargs`: Additional keyword arguments.
#### 2.3.2 `load_entry`
```python
def load_entry(
self, vector_id: str, namespace: Optional[str] = None
) -> Optional[BaseVector.Entry]:
```
Loads a single vector from the index.
- `vector_id` (str): The ID of the vector to load.
- `namespace` (Optional[str]): An optional namespace for the vector.
#### 2.3.3 `load_entries`
```python
def load_entries(self, namespace: Optional[str] = None) -> list[BaseVector.Entry]:
```
Loads all vectors from the index.
- `namespace` (Optional[str]): An optional namespace for the vectors.
#### 2.3.4 `query`
```python
def query(
self,
query: str,
count: Optional[int] = None,
namespace: Optional[str] = None,
include_vectors: bool = False,
include_metadata=True,
**kwargs
) -> list[BaseVector.QueryResult]:
```
Queries the index for vectors similar to the given query string.
- `query` (str): The query string.
- `count` (Optional[int]): The maximum number of results to return. If not provided, a default value is used.
- `namespace` (Optional[str]): An optional namespace for the query.
- `include_vectors` (bool): Whether to include vectors in the query results.
- `include_metadata` (bool): Whether to include metadata in the query results.
- `**kwargs`: Additional keyword arguments.
#### 2.3.5 `create_index`
```python
def create_index(self, name: str, **kwargs) -> None:
```
Creates a new index.
- `name` (str): The name of the index to create.
- `**kwargs`: Additional keyword arguments.
---
## 3. Installation <a name="installation"></a>
To use the Swarms library and the `PineconeVector` class, you will need to install the library and its dependencies. Follow these steps to get started:
1. Install Swarms:
```bash
pip install swarms
```
2. Install Pinecone:
You will also need a Pinecone account and API key. Follow the instructions on the Pinecone website to create an account and obtain an API key.
3. Import the necessary modules in your Python code:
```python
from swarms.memory.vector_stores.pinecone import PineconeVector
```
Now you're ready to use the `PineconeVector` class to work with Pinecone for vector storage.
---
## 4. Usage <a name="usage"></a>
In this section, we will provide detailed examples of how to use the `PineconeVector` class for vector storage with Pinecone.
### 4.1 Creating a PineconeVector Instance <a name="creating-a-pineconevector-instance"></a>
To get started, you need to create an instance of the `PineconeVector` class. You will need your Pinecone API key, the name of the index you want to use, and the environment. You can also specify an optional project name if you have one.
```python
pv = PineconeVector(
api_key="your-api-key",
index_name="your-index-name",
environment="us-west1-gcp",
project_name="your-project-name"
)
```
### 4.2 Creating an Index <a name="creating-an-index"></a>
Before you can upsert vectors, you need to create an index in Pinecone. You can use the `create_index` method for this purpose.
```python
pv.create_index("your-index-name")
```
### 4.3 Upserting Vectors <a name="upserting-vectors"></a>
You can upsert vectors into the Pine
cone index using the `upsert_vector` method. This method allows you to specify the vector, an optional vector ID, namespace, and metadata.
```python
vector = [0.1, 0.2, 0.3, 0.4]
vector_id = "unique-vector-id"
namespace = "your-namespace"
meta = {"key1": "value1", "key2": "value2"}
pv.upsert_vector(
vector=vector,
vector_id=vector_id,
namespace=namespace,
meta=meta
)
```
### 4.4 Querying the Index <a name="querying-the-index"></a>
You can query the Pinecone index to find vectors similar to a given query string using the `query` method. You can specify the query string, the maximum number of results to return, and other options.
```python
query_string = "your-query-string"
count = 10 # Maximum number of results to return
namespace = "your-namespace"
include_vectors = False # Set to True to include vectors in results
include_metadata = True
results = pv.query(
query=query_string,
count=count,
namespace=namespace,
include_vectors=include_vectors,
include_metadata=include_metadata
)
# Process the query results
for result in results:
vector_id = result.id
vector = result.vector
score = result.score
meta = result.meta
# Handle the results as needed
```
### 4.5 Loading an Entry <a name="loading-an-entry"></a>
You can load a single vector entry from the Pinecone index using the `load_entry` method. Provide the vector ID and an optional namespace.
```python
vector_id = "your-vector-id"
namespace = "your-namespace"
entry = pv.load_entry(vector_id=vector_id, namespace=namespace)
if entry is not None:
loaded_vector = entry.vector
loaded_meta = entry.meta
# Use the loaded vector and metadata
else:
# Vector not found
```
### 4.6 Loading Entries <a name="loading-entries"></a>
To load all vectors from the Pinecone index, you can use the `load_entries` method. You can also specify an optional namespace.
```python
namespace = "your-namespace"
entries = pv.load_entries(namespace=namespace)
# Process the loaded entries
for entry in entries:
vector_id = entry.id
vector = entry.vector
meta = entry.meta
# Handle the loaded entries as needed
```
---
## 5. Additional Information <a name="additional-information"></a>
In this section, we provide additional information and tips for using the `PineconeVector` class effectively.
- When upserting vectors, you can generate a unique vector ID using a hash of the vector's content to ensure uniqueness.
- Consider using namespaces to organize and categorize vectors within your Pinecone index.
- Pinecone provides powerful querying capabilities, so be sure to explore and leverage its features to retrieve relevant vectors efficiently.
- Keep your Pinecone API key secure and follow Pinecone's best practices for API key management.
---
## 6. References and Resources <a name="references-and-resources"></a>
Here are some references and resources for further information on Pinecone and Swarms:
- [Pinecone Website](https://www.pinecone.io/): Official Pinecone website for documentation and resources.
- [Pinecone Documentation](https://docs.pinecone.io/): Detailed documentation for Pinecone.
- [Swarms GitHub Repository](https://github.com/swarms): Swarms library on GitHub for updates and contributions.
---
This concludes the documentation for the Swarms library and the `PineconeVector` class. You now have a deep understanding of how to use Swarms with Pinecone for vector storage. If you have any further questions or need assistance, please refer to the provided references and resources. Happy coding!

@ -94,7 +94,9 @@ nav:
- swarms.structs:
- Overview: "swarms/structs/overview.md"
- Workflow: "swarms/structs/workflow.md"
- swarms.memory:
- PineconeVectorStoreStore: "swarms/memory/pinecone.md"
- PGVectorStore: "swarms/memory/pg.md"
- Examples:
- Overview: "examples/index.md"
- Agents:

@ -14,6 +14,7 @@ from swarms.swarms.orchestrate import Orchestrator
from swarms import swarms
from swarms import structs
from swarms import models
# from swarms.chunkers import chunkers
from swarms.workers.worker import Worker
from swarms import workers

@ -13,5 +13,5 @@ class MarkdownChunker(BaseChunker):
ChunkSeparator(". "),
ChunkSeparator("! "),
ChunkSeparator("? "),
ChunkSeparator(" ")
ChunkSeparator(" "),
]

@ -0,0 +1,3 @@
from swarms.memory.vector_stores.pinecone import PineconeVector
from swarms.memory.vector_stores.base import BaseVectorStore
from swarms.memory.vector_stores.pg import PgVectorVectorStore

@ -7,11 +7,10 @@ from swarms.utils.futures import execute_futures_dict
from griptape.artifacts import TextArtifact
@define
class BaseVectorStore(ABC):
"""
"""
""" """
DEFAULT_QUERY_COUNT = 5
@dataclass
@ -31,28 +30,31 @@ class BaseVectorStore(ABC):
embedding_driver: Any
futures_executor: futures.Executor = field(
default=Factory(lambda: futures.ThreadPoolExecutor()),
kw_only=True
default=Factory(lambda: futures.ThreadPoolExecutor()), kw_only=True
)
def upsert_text_artifacts(
self,
artifacts: dict[str, list[TextArtifact]],
meta: Optional[dict] = None,
**kwargs
self,
artifacts: dict[str, list[TextArtifact]],
meta: Optional[dict] = None,
**kwargs
) -> None:
execute_futures_dict({
namespace:
self.futures_executor.submit(self.upsert_text_artifact, a, namespace, meta, **kwargs)
for namespace, artifact_list in artifacts.items() for a in artifact_list
})
execute_futures_dict(
{
namespace: self.futures_executor.submit(
self.upsert_text_artifact, a, namespace, meta, **kwargs
)
for namespace, artifact_list in artifacts.items()
for a in artifact_list
}
)
def upsert_text_artifact(
self,
artifact: TextArtifact,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
self,
artifact: TextArtifact,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
) -> str:
if not meta:
meta = {}
@ -65,20 +67,16 @@ class BaseVectorStore(ABC):
vector = artifact.generate_embedding(self.embedding_driver)
return self.upsert_vector(
vector,
vector_id=artifact.id,
namespace=namespace,
meta=meta,
**kwargs
vector, vector_id=artifact.id, namespace=namespace, meta=meta, **kwargs
)
def upsert_text(
self,
string: str,
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
self,
string: str,
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
) -> str:
return self.upsert_vector(
self.embedding_driver.embed_string(string),
@ -90,12 +88,12 @@ class BaseVectorStore(ABC):
@abstractmethod
def upsert_vector(
self,
vector: list[float],
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
self,
vector: list[float],
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
) -> str:
...
@ -109,11 +107,11 @@ class BaseVectorStore(ABC):
@abstractmethod
def query(
self,
query: str,
count: Optional[int] = None,
namespace: Optional[str] = None,
include_vectors: bool = False,
**kwargs
self,
query: str,
count: Optional[int] = None,
namespace: Optional[str] = None,
include_vectors: bool = False,
**kwargs
) -> list[QueryResult]:
...

@ -2,7 +2,7 @@ import uuid
from typing import Optional
from attr import define, field, Factory
from dataclasses import dataclass
from swarms.memory.vector_stores.base import BaseVectorStoreDriver
from swarms.memory.vector_stores.base import BaseVectorStore
from sqlalchemy.engine import Engine
from sqlalchemy import create_engine, Column, String, JSON
from sqlalchemy.ext.declarative import declarative_base
@ -12,7 +12,7 @@ from pgvector.sqlalchemy import Vector
@define
class PgVectorVectorStore(BaseVector):
class PgVectorVectorStore(BaseVectorStore):
"""A vector store driver to Postgres using the PGVector extension.
Attributes:
@ -20,13 +20,77 @@ class PgVectorVectorStore(BaseVector):
create_engine_params: Additional configuration params passed when creating the database connection.
engine: An optional sqlalchemy Postgres engine to use.
table_name: Optionally specify the name of the table to used to store vectors.
Methods:
upsert_vector(vector: list[float], vector_id: Optional[str] = None, namespace: Optional[str] = None, meta: Optional[dict] = None, **kwargs) -> str:
Upserts a vector into the index.
load_entry(vector_id: str, namespace: Optional[str] = None) -> Optional[BaseVector.Entry]:
Loads a single vector from the index.
load_entries(namespace: Optional[str] = None) -> list[BaseVector.Entry]:
Loads all vectors from the index.
query(query: str, count: Optional[int] = None, namespace: Optional[str] = None, include_vectors: bool = False, include_metadata=True, **kwargs) -> list[BaseVector.QueryResult]:
Queries the index for vectors similar to the given query string.
setup(create_schema: bool = True, install_uuid_extension: bool = True, install_vector_extension: bool = True) -> None:
Provides a mechanism to initialize the database schema and extensions.
Usage:
>>> from swarms.memory.vector_stores.pgvector import PgVectorVectorStore
>>> from swarms.utils.embeddings import USEEmbedding
>>> from swarms.utils.hash import str_to_hash
>>> from swarms.utils.dataframe import dataframe_to_hash
>>> import pandas as pd
>>>
>>> # Create a new PgVectorVectorStore instance:
>>> pv = PgVectorVectorStore(
>>> connection_string="postgresql://postgres:password@localhost:5432/postgres",
>>> table_name="your-table-name"
>>> )
>>> # Create a new index:
>>> pv.setup()
>>> # Create a new USEEmbedding instance:
>>> use = USEEmbedding()
>>> # Create a new dataframe:
>>> df = pd.DataFrame({
>>> "text": [
>>> "This is a test",
>>> "This is another test",
>>> "This is a third test"
>>> ]
>>> })
>>> # Embed the dataframe:
>>> df["embedding"] = df["text"].apply(use.embed_string)
>>> # Upsert the dataframe into the index:
>>> pv.upsert_vector(
>>> vector=df["embedding"].tolist(),
>>> vector_id=dataframe_to_hash(df),
>>> namespace="your-namespace"
>>> )
>>> # Query the index:
>>> pv.query(
>>> query="This is a test",
>>> count=10,
>>> namespace="your-namespace"
>>> )
>>> # Load a single entry from the index:
>>> pv.load_entry(
>>> vector_id=dataframe_to_hash(df),
>>> namespace="your-namespace"
>>> )
>>> # Load all entries from the index:
>>> pv.load_entries(
>>> namespace="your-namespace"
>>> )
"""
connection_string: Optional[str] = field(default=None, kw_only=True)
create_engine_params: dict = field(factory=dict, kw_only=True)
engine: Optional[Engine] = field(default=None, kw_only=True)
table_name: str = field(kw_only=True)
_model: any = field(default=Factory(lambda self: self.default_vector_model(), takes_self=True))
_model: any = field(
default=Factory(lambda self: self.default_vector_model(), takes_self=True)
)
@connection_string.validator
def validate_connection_string(self, _, connection_string: Optional[str]) -> None:
@ -39,7 +103,9 @@ class PgVectorVectorStore(BaseVector):
raise ValueError("An engine or connection string is required")
if not connection_string.startswith("postgresql://"):
raise ValueError("The connection string must describe a Postgres database connection")
raise ValueError(
"The connection string must describe a Postgres database connection"
)
@engine.validator
def validate_engine(self, _, engine: Optional[Engine]) -> None:
@ -56,7 +122,9 @@ class PgVectorVectorStore(BaseVector):
If not, a connection string is used to create a new database connection here.
"""
if self.engine is None:
self.engine = create_engine(self.connection_string, **self.create_engine_params)
self.engine = create_engine(
self.connection_string, **self.create_engine_params
)
def setup(
self,
@ -96,19 +164,23 @@ class PgVectorVectorStore(BaseVector):
return str(obj.id)
def load_entry(self, vector_id: str, namespace: Optional[str] = None) -> BaseVectorStoreDriver.Entry:
def load_entry(
self, vector_id: str, namespace: Optional[str] = None
) -> BaseVectorStore.Entry:
"""Retrieves a specific vector entry from the collection based on its identifier and optional namespace."""
with Session(self.engine) as session:
result = session.get(self._model, vector_id)
return BaseVectorStoreDriver.Entry(
return BaseVectorStore.Entry(
id=result.id,
vector=result.vector,
namespace=result.namespace,
meta=result.meta,
)
def load_entries(self, namespace: Optional[str] = None) -> list[BaseVectorStoreDriver.Entry]:
def load_entries(
self, namespace: Optional[str] = None
) -> list[BaseVectorStore.Entry]:
"""Retrieves all vector entries from the collection, optionally filtering to only
those that match the provided namespace.
"""
@ -120,7 +192,7 @@ class PgVectorVectorStore(BaseVector):
results = query.all()
return [
BaseVectorStoreDriver.Entry(
BaseVectorStore.Entry(
id=str(result.id),
vector=result.vector,
namespace=result.namespace,
@ -132,12 +204,12 @@ class PgVectorVectorStore(BaseVector):
def query(
self,
query: str,
count: Optional[int] = BaseVectorStoreDriver.DEFAULT_QUERY_COUNT,
count: Optional[int] = BaseVectorStore.DEFAULT_QUERY_COUNT,
namespace: Optional[str] = None,
include_vectors: bool = False,
distance_metric: str = "cosine_distance",
**kwargs
) -> list[BaseVectorStoreDriver.QueryResult]:
) -> list[BaseVectorStore.QueryResult]:
"""Performs a search on the collection to find vectors similar to the provided input vector,
optionally filtering to only those that match the provided namespace.
"""
@ -167,7 +239,7 @@ class PgVectorVectorStore(BaseVector):
results = query.limit(count).all()
return [
BaseVectorStoreDriver.QueryResult(
BaseVectorStore.QueryResult(
id=str(result[0].id),
vector=result[0].vector if include_vectors else None,
score=result[1],
@ -184,7 +256,13 @@ class PgVectorVectorStore(BaseVector):
class VectorModel(Base):
__tablename__ = self.table_name
id = Column(UUID(as_uuid=True), primary_key=True, default=uuid.uuid4, unique=True, nullable=False)
id = Column(
UUID(as_uuid=True),
primary_key=True,
default=uuid.uuid4,
unique=True,
nullable=False,
)
vector = Column(Vector())
namespace = Column(String)
meta = Column(JSON)

@ -6,9 +6,9 @@ from swarms.utils.hash import str_to_hash
@define
class PineconeVector(BaseVector):
class PineconeVectorStoreStore(BaseVector):
"""
PineconeVector is a vector storage driver that uses Pinecone as the underlying storage engine.
PineconeVectorStore is a vector storage driver that uses Pinecone as the underlying storage engine.
Pinecone is a vector database that allows you to store, search, and retrieve high-dimensional vectors with
blazing speed and low latency. It is a managed service that is easy to use and scales effortlessly, so you can
@ -34,14 +34,14 @@ class PineconeVector(BaseVector):
Creates a new index.
Usage:
>>> from swarms.memory.vector_stores.pinecone import PineconeVector
>>> from swarms.memory.vector_stores.pinecone import PineconeVectorStore
>>> from swarms.utils.embeddings import USEEmbedding
>>> from swarms.utils.hash import str_to_hash
>>> from swarms.utils.dataframe import dataframe_to_hash
>>> import pandas as pd
>>>
>>> # Create a new PineconeVector instance:
>>> pv = PineconeVector(
>>> # Create a new PineconeVectorStore instance:
>>> pv = PineconeVectorStore(
>>> api_key="your-api-key",
>>> index_name="your-index-name",
>>> environment="us-west1-gcp",
@ -85,6 +85,7 @@ class PineconeVector(BaseVector):
"""
api_key: str = field(kw_only=True)
index_name: str = field(kw_only=True)
environment: str = field(kw_only=True)
@ -92,33 +93,36 @@ class PineconeVector(BaseVector):
index: pinecone.Index = field(init=False)
def __attrs_post_init__(self) -> None:
""" Post init"""
pinecone.init(
api_key=self.api_key,
environment=self.environment,
project_name=self.project_name
project_name=self.project_name,
)
self.index = pinecone.Index(self.index_name)
def upsert_vector(
self,
vector: list[float],
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
self,
vector: list[float],
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
) -> str:
"""Upsert vector"""
vector_id = vector_id if vector_id else str_to_hash(str(vector))
params = {
"namespace": namespace
} | kwargs
params = {"namespace": namespace} | kwargs
self.index.upsert([(vector_id, vector, meta)], **params)
return vector_id
def load_entry(self, vector_id: str, namespace: Optional[str] = None) -> Optional[BaseVector.Entry]:
def load_entry(
self, vector_id: str, namespace: Optional[str] = None
) -> Optional[BaseVector.Entry]:
"""Load entry """
result = self.index.fetch(ids=[vector_id], namespace=namespace).to_dict()
vectors = list(result["vectors"].values())
@ -129,12 +133,13 @@ class PineconeVector(BaseVector):
id=vector["id"],
meta=vector["metadata"],
vector=vector["values"],
namespace=result["namespace"]
namespace=result["namespace"],
)
else:
return None
def load_entries(self, namespace: Optional[str] = None) -> list[BaseVector.Entry]:
"""Load entries"""
# This is a hacky way to query up to 10,000 values from Pinecone. Waiting on an official API for fetching
# all values from a namespace:
# https://community.pinecone.io/t/is-there-a-way-to-query-all-the-vectors-and-or-metadata-from-a-namespace/797/5
@ -143,7 +148,7 @@ class PineconeVector(BaseVector):
self.embedding_driver.embed_string(""),
top_k=10000,
include_metadata=True,
namespace=namespace
namespace=namespace,
)
return [
@ -151,28 +156,29 @@ class PineconeVector(BaseVector):
id=r["id"],
vector=r["values"],
meta=r["metadata"],
namespace=results["namespace"]
namespace=results["namespace"],
)
for r in results["matches"]
]
def query(
self,
query: str,
count: Optional[int] = None,
namespace: Optional[str] = None,
include_vectors: bool = False,
# PineconeVectorStorageDriver-specific params:
include_metadata=True,
**kwargs
self,
query: str,
count: Optional[int] = None,
namespace: Optional[str] = None,
include_vectors: bool = False,
# PineconeVectorStoreStorageDriver-specific params:
include_metadata=True,
**kwargs
) -> list[BaseVector.QueryResult]:
"""Query vectors"""
vector = self.embedding_driver.embed_string(query)
params = {
"top_k": count if count else BaseVector.DEFAULT_QUERY_COUNT,
"namespace": namespace,
"include_values": include_vectors,
"include_metadata": include_metadata
"include_metadata": include_metadata,
} | kwargs
results = self.index.query(vector, **params)
@ -183,15 +189,13 @@ class PineconeVector(BaseVector):
vector=r["values"],
score=r["score"],
meta=r["metadata"],
namespace=results["namespace"]
namespace=results["namespace"],
)
for r in results["matches"]
]
def create_index(self, name: str, **kwargs) -> None:
params = {
"name": name,
"dimension": self.embedding_driver.dimensions
} | kwargs
"""Create index"""
params = {"name": name, "dimension": self.embedding_driver.dimensions} | kwargs
pinecone.create_index(**params)

@ -1,8 +1,12 @@
import pandas as pd
import hashlib
def dataframe_to_hash(dataframe: pd.DataFrame) -> str:
return hashlib.sha256(pd.util.hash_pandas_object(dataframe, index=True).values).hexdigest()
return hashlib.sha256(
pd.util.hash_pandas_object(dataframe, index=True).values
).hexdigest()
def str_to_hash(text: str, hash_algorithm: str = "sha256") -> str:
m = hashlib.new(hash_algorithm)

Loading…
Cancel
Save