From b0347ac296ff75c662e424ca010e325b085b1684 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 15:19:22 -0700 Subject: [PATCH 01/32] pydantic bump --- swarms/memory/schemas.py | 36 ++++++++++++------------- swarms/models/anthropic.py | 18 ++++++------- swarms/models/cohere_chat.py | 11 +++----- swarms/models/dalle3.py | 5 ++-- swarms/models/eleven_labs.py | 5 ++-- swarms/models/fastvit.py | 2 ++ swarms/models/kosmos2.py | 2 ++ swarms/models/openai_embeddings.py | 16 +++++------ swarms/models/openai_function_caller.py | 5 ++-- swarms/models/openai_models.py | 22 ++++++++------- swarms/models/palm.py | 4 ++- swarms/models/ssd_1b.py | 5 ++-- swarms/models/timm.py | 7 ++--- swarms/tools/tool.py | 8 +++--- swarms/utils/serializable.py | 6 ++--- 15 files changed, 77 insertions(+), 75 deletions(-) diff --git a/swarms/memory/schemas.py b/swarms/memory/schemas.py index 9147a909..589a80ae 100644 --- a/swarms/memory/schemas.py +++ b/swarms/memory/schemas.py @@ -12,7 +12,7 @@ class TaskInput(BaseModel): description=( "The input parameters for the task. Any value is allowed." ), - example='{\n"debug": false,\n"mode": "benchmarks"\n}', + examples=['{\n"debug": false,\n"mode": "benchmarks"\n}'], ) @@ -20,17 +20,17 @@ class Artifact(BaseModel): artifact_id: str = Field( ..., description="Id of the artifact", - example="b225e278-8b4c-4f99-a696-8facf19f0e56", + examples=["b225e278-8b4c-4f99-a696-8facf19f0e56"], ) file_name: str = Field( - ..., description="Filename of the artifact", example="main.py" + ..., description="Filename of the artifact", examples=["main.py"] ) relative_path: Optional[str] = Field( None, description=( "Relative path of the artifact in the agent's workspace" ), - example="python/code/", + examples=["python/code/"], ) @@ -41,7 +41,7 @@ class ArtifactUpload(BaseModel): description=( "Relative path of the artifact in the agent's workspace" ), - example="python/code/", + examples=["python/code/"], ) @@ -52,7 +52,7 @@ class StepInput(BaseModel): "Input parameters for the task step. Any value is" " allowed." ), - example='{\n"file_to_refactor": "models.py"\n}', + examples=['{\n"file_to_refactor": "models.py"\n}'], ) @@ -63,7 +63,7 @@ class StepOutput(BaseModel): "Output that the task step has produced. Any value is" " allowed." ), - example='{\n"tokens": 7894,\n"estimated_cost": "0,24$"\n}', + examples=['{\n"tokens": 7894,\n"estimated_cost": "0,24$"\n}'], ) @@ -71,9 +71,9 @@ class TaskRequestBody(BaseModel): input: Optional[str] = Field( None, description="Input prompt for the task.", - example=( + examples=[( "Write the words you receive to the file 'output.txt'." - ), + )], ) additional_input: Optional[TaskInput] = None @@ -82,15 +82,15 @@ class Task(TaskRequestBody): task_id: str = Field( ..., description="The ID of the task.", - example="50da533e-3904-4401-8a07-c49adf88b5eb", + examples=["50da533e-3904-4401-8a07-c49adf88b5eb"], ) artifacts: List[Artifact] = Field( [], description="A list of artifacts that the task has produced.", - example=[ + examples=[[ "7a49f31c-f9c6-4346-a22c-e32bc5af4d8e", "ab7b4091-2560-4692-a4fe-d831ea3ca7d6", - ], + ]], ) @@ -98,7 +98,7 @@ class StepRequestBody(BaseModel): input: Optional[str] = Field( None, description="Input prompt for the step.", - example="Washington", + examples=["Washington"], ) additional_input: Optional[StepInput] = None @@ -113,17 +113,17 @@ class Step(StepRequestBody): task_id: str = Field( ..., description="The ID of the task this step belongs to.", - example="50da533e-3904-4401-8a07-c49adf88b5eb", + examples=["50da533e-3904-4401-8a07-c49adf88b5eb"], ) step_id: str = Field( ..., description="The ID of the task step.", - example="6bb1801a-fd80-45e8-899a-4dd723cc602e", + examples=["6bb1801a-fd80-45e8-899a-4dd723cc602e"], ) name: Optional[str] = Field( None, description="The name of the task step.", - example="Write to file", + examples=["Write to file"], ) status: Status = Field( ..., description="The status of the task step." @@ -131,11 +131,11 @@ class Step(StepRequestBody): output: Optional[str] = Field( None, description="Output of the task step.", - example=( + examples=[( "I am going to use the write_to_file command and write" " Washington to a file called output.txt" " Dict: extra = values.get("model_kwargs", {}) all_required_field_names = get_pydantic_field_names(cls) @@ -269,7 +270,8 @@ class _AnthropicCommon(BaseLanguageModel): ) return values - @root_validator() + @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" values["anthropic_api_key"] = convert_to_secret_str( @@ -376,14 +378,10 @@ class Anthropic(LLM, _AnthropicCommon): prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" response = model(prompt) """ + model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) - class Config: - """Configuration for this pydantic object.""" - - allow_population_by_field_name = True - arbitrary_types_allowed = True - - @root_validator() + @model_validator() + @classmethod def raise_warning(cls, values: Dict) -> Dict: """Raise warning that this class is deprecated.""" warnings.warn( diff --git a/swarms/models/cohere_chat.py b/swarms/models/cohere_chat.py index 1a31d82e..efd8728a 100644 --- a/swarms/models/cohere_chat.py +++ b/swarms/models/cohere_chat.py @@ -16,7 +16,7 @@ from langchain.callbacks.manager import ( from langchain.llms.base import LLM from langchain.llms.utils import enforce_stop_tokens from langchain.load.serializable import Serializable -from pydantic import Extra, Field, root_validator +from pydantic import model_validator, ConfigDict, Field from langchain.utils import get_from_dict_or_env logger = logging.getLogger(__name__) @@ -85,7 +85,8 @@ class BaseCohere(Serializable): user_agent: str = "langchain" """Identifier for the application making the request.""" - @root_validator() + @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" try: @@ -145,11 +146,7 @@ class Cohere(LLM, BaseCohere): max_retries: int = 10 """Maximum number of retries to make when generating.""" - - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid + model_config = ConfigDict(extra="forbid") @property def _default_params(self) -> Dict[str, Any]: diff --git a/swarms/models/dalle3.py b/swarms/models/dalle3.py index 40f63418..17790c74 100644 --- a/swarms/models/dalle3.py +++ b/swarms/models/dalle3.py @@ -13,7 +13,7 @@ from cachetools import TTLCache from dotenv import load_dotenv from openai import OpenAI from PIL import Image -from pydantic import validator +from pydantic import field_validator from termcolor import colored load_dotenv() @@ -92,7 +92,8 @@ class Dalle3: arbitrary_types_allowed = True - @validator("max_retries", "time_seconds") + @field_validator("max_retries", "time_seconds") + @classmethod def must_be_positive(cls, value): if value <= 0: raise ValueError("Must be positive") diff --git a/swarms/models/eleven_labs.py b/swarms/models/eleven_labs.py index 2d55e864..759c65bb 100644 --- a/swarms/models/eleven_labs.py +++ b/swarms/models/eleven_labs.py @@ -3,7 +3,7 @@ from enum import Enum from typing import Any, Dict, Union from langchain.utils import get_from_dict_or_env -from pydantic import root_validator +from pydantic import model_validator from swarms.tools.tool import BaseTool @@ -59,7 +59,8 @@ class ElevenLabsText2SpeechTool(BaseTool): " Italian, French, Portuguese, and Hindi. " ) - @root_validator(pre=True) + @model_validator(mode="before") + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key exists in environment.""" _ = get_from_dict_or_env( diff --git a/swarms/models/fastvit.py b/swarms/models/fastvit.py index a6fc31f8..f3b60587 100644 --- a/swarms/models/fastvit.py +++ b/swarms/models/fastvit.py @@ -20,6 +20,8 @@ class ClassificationResult(BaseModel): class_id: List[StrictInt] confidence: List[StrictFloat] + # TODO[pydantic]: We couldn't refactor the `validator`, please replace it by `field_validator` manually. + # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information. @validator("class_id", "confidence", pre=True, each_item=True) def check_list_contents(cls, v): assert isinstance(v, int) or isinstance( diff --git a/swarms/models/kosmos2.py b/swarms/models/kosmos2.py index 9a9a0de3..d251ea23 100644 --- a/swarms/models/kosmos2.py +++ b/swarms/models/kosmos2.py @@ -20,6 +20,8 @@ class Detections(BaseModel): ), "All fields must have the same length." return values + # TODO[pydantic]: We couldn't refactor the `validator`, please replace it by `field_validator` manually. + # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information. @validator( "xyxy", "class_id", "confidence", pre=True, each_item=True ) diff --git a/swarms/models/openai_embeddings.py b/swarms/models/openai_embeddings.py index 0cbbdbee..3265a141 100644 --- a/swarms/models/openai_embeddings.py +++ b/swarms/models/openai_embeddings.py @@ -16,7 +16,7 @@ from typing import ( ) import numpy as np -from pydantic import BaseModel, Extra, Field, root_validator +from pydantic import model_validator, ConfigDict, BaseModel, Field from tenacity import ( AsyncRetrying, before_sleep_log, @@ -186,7 +186,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): """ - client: Any #: :meta private: + client: Any = None #: :meta private: model: str = "text-embedding-ada-002" deployment: str = model # to support Azure OpenAI Service custom deployment names openai_api_version: Optional[str] = None @@ -227,13 +227,10 @@ class OpenAIEmbeddings(BaseModel, Embeddings): """Whether to show a progress bar when embedding.""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" + model_config = ConfigDict(extra="forbid") - class Config: - """Configuration for this pydantic object.""" - - extra = Extra.forbid - - @root_validator(pre=True) + @model_validator(mode="before") + @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" all_required_field_names = get_pydantic_field_names(cls) @@ -264,7 +261,8 @@ class OpenAIEmbeddings(BaseModel, Embeddings): values["model_kwargs"] = extra return values - @root_validator() + @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" values["openai_api_key"] = get_from_dict_or_env( diff --git a/swarms/models/openai_function_caller.py b/swarms/models/openai_function_caller.py index 6542e457..feb04387 100644 --- a/swarms/models/openai_function_caller.py +++ b/swarms/models/openai_function_caller.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional, Union import openai import requests -from pydantic import BaseModel, validator +from pydantic import field_validator, BaseModel from tenacity import ( retry, stop_after_attempt, @@ -78,7 +78,8 @@ class FunctionSpecification(BaseModel): parameters: Dict[str, Any] required: Optional[List[str]] = None - @validator("parameters") + @field_validator("parameters") + @classmethod def check_parameters(cls, params): if not isinstance(params, dict): raise ValueError("Parameters must be a dictionary.") diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py index 14332ff2..12830cec 100644 --- a/swarms/models/openai_models.py +++ b/swarms/models/openai_models.py @@ -37,6 +37,7 @@ from langchain.utils.utils import build_extra_kwargs from importlib.metadata import version from packaging.version import parse +from pydantic import model_validator, ConfigDict logger = logging.getLogger(__name__) @@ -247,13 +248,10 @@ class BaseOpenAI(BaseLLM): """Initialize the OpenAI object.""" data.get("model_name", "") return super().__new__(cls) + model_config = ConfigDict(populate_by_name=True) - class Config: - """Configuration for this pydantic object.""" - - allow_population_by_field_name = True - - @root_validator(pre=True) + @model_validator(mode="before") + @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" all_required_field_names = get_pydantic_field_names(cls) @@ -263,7 +261,8 @@ class BaseOpenAI(BaseLLM): ) return values - @root_validator() + @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" values["openai_api_key"] = get_from_dict_or_env( @@ -758,7 +757,8 @@ class AzureOpenAI(BaseOpenAI): openai_api_type: str = "" openai_api_version: str = "" - @root_validator() + @model_validator() + @classmethod def validate_azure_settings(cls, values: Dict) -> Dict: values["openai_api_version"] = get_from_dict_or_env( values, @@ -847,7 +847,8 @@ class OpenAIChat(BaseLLM): disallowed_special: Union[Literal["all"], Collection[str]] = "all" """Set of special tokens that are not allowed。""" - @root_validator(pre=True) + @model_validator(mode="before") + @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" all_required_field_names = { @@ -865,7 +866,8 @@ class OpenAIChat(BaseLLM): values["model_kwargs"] = extra return values - @root_validator() + @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" openai_api_key = get_from_dict_or_env( diff --git a/swarms/models/palm.py b/swarms/models/palm.py index d61d4856..e016a776 100644 --- a/swarms/models/palm.py +++ b/swarms/models/palm.py @@ -15,6 +15,7 @@ from tenacity import ( stop_after_attempt, wait_exponential, ) +from pydantic import model_validator logger = logging.getLogger(__name__) @@ -104,7 +105,8 @@ class GooglePalm(BaseLLM, BaseModel): """Number of chat completions to generate for each prompt. Note that the API may not return the full n completions if duplicates are generated.""" - @root_validator() + @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate api key, python package exists.""" google_api_key = get_from_dict_or_env( diff --git a/swarms/models/ssd_1b.py b/swarms/models/ssd_1b.py index d3b9086b..9a905bd4 100644 --- a/swarms/models/ssd_1b.py +++ b/swarms/models/ssd_1b.py @@ -9,7 +9,7 @@ import backoff import torch from diffusers import StableDiffusionXLPipeline from PIL import Image -from pydantic import validator +from pydantic import field_validator from termcolor import colored from cachetools import TTLCache @@ -72,7 +72,8 @@ class SSD1B: arbitrary_types_allowed = True - @validator("max_retries", "time_seconds") + @field_validator("max_retries", "time_seconds") + @classmethod def must_be_positive(cls, value): if value <= 0: raise ValueError("Must be positive") diff --git a/swarms/models/timm.py b/swarms/models/timm.py index d1c42165..8dec0bc9 100644 --- a/swarms/models/timm.py +++ b/swarms/models/timm.py @@ -2,17 +2,14 @@ from typing import List import timm import torch -from pydantic import BaseModel +from pydantic import ConfigDict, BaseModel class TimmModelInfo(BaseModel): model_name: str pretrained: bool in_chans: int - - class Config: - # Use strict typing for all fields - strict = True + model_config = ConfigDict(strict=True) class TimmModel: diff --git a/swarms/tools/tool.py b/swarms/tools/tool.py index 1029a183..ba7752bd 100644 --- a/swarms/tools/tool.py +++ b/swarms/tools/tool.py @@ -30,11 +30,10 @@ from langchain.callbacks.manager import ( from langchain.load.serializable import Serializable from pydantic import ( - BaseModel, + model_validator, BaseModel, Extra, Field, create_model, - root_validator, validate_arguments, ) from langchain.schema.runnable import ( @@ -192,6 +191,8 @@ class ChildTool(BaseTool): ] = False """Handle the content of the ToolException thrown.""" + # TODO[pydantic]: The `Config` class inherits from another class, please create the `model_config` manually. + # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-config for more information. class Config(Serializable.Config): """Configuration for this pydantic object.""" @@ -276,7 +277,8 @@ class ChildTool(BaseTool): } return tool_input - @root_validator() + @model_validator() + @classmethod def raise_deprecation(cls, values: Dict) -> Dict: """Raise deprecation warning if callback_manager is used.""" if values.get("callback_manager") is not None: diff --git a/swarms/utils/serializable.py b/swarms/utils/serializable.py index de9444ef..3cc3a5f6 100644 --- a/swarms/utils/serializable.py +++ b/swarms/utils/serializable.py @@ -1,7 +1,7 @@ from abc import ABC from typing import Any, Dict, List, Literal, TypedDict, Union, cast -from pydantic import BaseModel, PrivateAttr +from pydantic import ConfigDict, BaseModel, PrivateAttr class BaseSerialized(TypedDict): @@ -64,9 +64,7 @@ class Serializable(BaseModel, ABC): constructor. """ return {} - - class Config: - extra = "ignore" + model_config = ConfigDict(extra="ignore") _lc_kwargs = PrivateAttr(default_factory=dict) From cfb08bb11a86ba23cf7fd2c0d003022ab4d55e38 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 16:05:20 -0700 Subject: [PATCH 02/32] bump pydantic to 2.x --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 7069a7c9..6a78c1b1 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -52,7 +52,7 @@ ratelimit = "*" beautifulsoup4 = "*" cohere = "*" huggingface-hub = "*" -pydantic = "1.10.12" +pydantic = "2.*" tenacity = "*" Pillow = "*" chromadb = "*" diff --git a/requirements.txt b/requirements.txt index 576650cd..378b2f05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ faiss-cpu openai==0.28.0 attrs datasets -pydantic==1.10.12 +pydantic>2 soundfile huggingface-hub google-generativeai From f79f7bbdd827838fbaf5f270a9e28aed327d32f6 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 17:07:26 -0700 Subject: [PATCH 03/32] remove pydantic from tools.py --- swarms/tools/tool.py | 76 +++++--------------------------------------- 1 file changed, 8 insertions(+), 68 deletions(-) diff --git a/swarms/tools/tool.py b/swarms/tools/tool.py index ba7752bd..6248d7fd 100644 --- a/swarms/tools/tool.py +++ b/swarms/tools/tool.py @@ -29,13 +29,7 @@ from langchain.callbacks.manager import ( ) from langchain.load.serializable import Serializable -from pydantic import ( - model_validator, BaseModel, - Extra, - Field, - create_model, - validate_arguments, -) + from langchain.schema.runnable import ( Runnable, RunnableConfig, @@ -47,62 +41,9 @@ class SchemaAnnotationError(TypeError): """Raised when 'args_schema' is missing or has an incorrect type annotation.""" -def _create_subset_model( - name: str, model: BaseModel, field_names: list -) -> Type[BaseModel]: - """Create a pydantic model with only a subset of model's fields.""" - fields = {} - for field_name in field_names: - field = model.__fields__[field_name] - fields[field_name] = (field.outer_type_, field.field_info) - return create_model(name, **fields) # type: ignore - - -def _get_filtered_args( - inferred_model: Type[BaseModel], - func: Callable, -) -> dict: - """Get the arguments from a function's signature.""" - schema = inferred_model.schema()["properties"] - valid_keys = signature(func).parameters - return { - k: schema[k] - for k in valid_keys - if k not in ("run_manager", "callbacks") - } - - -class _SchemaConfig: - """Configuration for the pydantic model.""" - extra: Any = Extra.forbid - arbitrary_types_allowed: bool = True -def create_schema_from_function( - model_name: str, - func: Callable, -) -> Type[BaseModel]: - """Create a pydantic schema from a function's signature. - Args: - model_name: Name to assign to the generated pydandic schema - func: Function to generate the schema from - Returns: - A pydantic model with the same arguments as the function - """ - # https://docs.pydantic.dev/latest/usage/validation_decorator/ - validated = validate_arguments(func, config=_SchemaConfig) # type: ignore - inferred_model = validated.model # type: ignore - if "run_manager" in inferred_model.__fields__: - del inferred_model.__fields__["run_manager"] - if "callbacks" in inferred_model.__fields__: - del inferred_model.__fields__["callbacks"] - # Pydantic adds placeholder virtual fields we need to strip - valid_properties = _get_filtered_args(inferred_model, func) - return _create_subset_model( - f"{model_name}Schema", inferred_model, list(valid_properties) - ) - class ToolException(Exception): """An optional exception that tool throws when execution error occurs. @@ -130,7 +71,7 @@ class BaseTool(RunnableSerializable[Union[str, Dict], Any]): if args_schema_type is not None: if ( args_schema_type is None - or args_schema_type == BaseModel + # or args_schema_type == BaseModel ): # Throw errors for common mis-annotations. # TODO: Use get_args / get_origin and fully @@ -167,8 +108,9 @@ class ChildTool(BaseTool): verbose: bool = False """Whether to log the tool's progress.""" - callbacks: Callbacks = Field(default=None, exclude=True) + callbacks: """Callbacks to be called during tool execution.""" + # TODO: I don't know how to remove Field here callback_manager: Optional[BaseCallbackManager] = Field( default=None, exclude=True ) @@ -195,7 +137,7 @@ class ChildTool(BaseTool): # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-config for more information. class Config(Serializable.Config): """Configuration for this pydantic object.""" - + model_config = {} arbitrary_types_allowed = True @property @@ -215,7 +157,8 @@ class ChildTool(BaseTool): # --- Runnable --- @property - def input_schema(self) -> Type[BaseModel]: + # TODO + def input_schema(self): """The tool's input schema.""" if self.args_schema is not None: return self.args_schema @@ -277,7 +220,6 @@ class ChildTool(BaseTool): } return tool_input - @model_validator() @classmethod def raise_deprecation(cls, values: Dict) -> Dict: """Raise deprecation warning if callback_manager is used.""" @@ -673,9 +615,7 @@ class StructuredTool(BaseTool): """Tool that can operate on any number of inputs.""" description: str = "" - args_schema: Type[BaseModel] = Field( - ..., description="The tool schema." - ) + """The input arguments' schema.""" func: Optional[Callable[..., Any]] """The function to run when the tool is called.""" From b017b1ddd01ff2fcc72c9ecf244a54f1b8efdf1a Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 17:22:05 -0700 Subject: [PATCH 04/32] callbacks tool.py --- swarms/tools/tool.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/tools/tool.py b/swarms/tools/tool.py index 6248d7fd..23b18450 100644 --- a/swarms/tools/tool.py +++ b/swarms/tools/tool.py @@ -108,7 +108,7 @@ class ChildTool(BaseTool): verbose: bool = False """Whether to log the tool's progress.""" - callbacks: + callbacks: Callbacks = None """Callbacks to be called during tool execution.""" # TODO: I don't know how to remove Field here callback_manager: Optional[BaseCallbackManager] = Field( From 86c262e43a3cc067f36cd538b0850506fad54536 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 17:23:46 -0700 Subject: [PATCH 05/32] callback_managwer --- swarms/tools/tool.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/swarms/tools/tool.py b/swarms/tools/tool.py index 23b18450..838b89bb 100644 --- a/swarms/tools/tool.py +++ b/swarms/tools/tool.py @@ -111,9 +111,7 @@ class ChildTool(BaseTool): callbacks: Callbacks = None """Callbacks to be called during tool execution.""" # TODO: I don't know how to remove Field here - callback_manager: Optional[BaseCallbackManager] = Field( - default=None, exclude=True - ) + callback_manager: Optional[BaseCallbackManager] = None """Deprecated. Please use callbacks instead.""" tags: Optional[List[str]] = None """Optional list of tags associated with the tool. Defaults to None From 1412aef5e2ec9271adcfa123afb8f50f67f685bd Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 17:34:01 -0700 Subject: [PATCH 06/32] antthropic remove pydantic --- swarms/models/anthropic.py | 26 ++++++-------------------- 1 file changed, 6 insertions(+), 20 deletions(-) diff --git a/swarms/models/anthropic.py b/swarms/models/anthropic.py index fe30ac4f..1e61daa8 100644 --- a/swarms/models/anthropic.py +++ b/swarms/models/anthropic.py @@ -24,7 +24,7 @@ from langchain.callbacks.manager import ( CallbackManagerForLLMRun, ) from langchain.llms.base import LLM -from pydantic import model_validator, ConfigDict, Field, SecretStr + from langchain.schema.language_model import BaseLanguageModel from langchain.schema.output import GenerationChunk from langchain.schema.prompt import PromptValue @@ -219,21 +219,13 @@ def build_extra_kwargs( return extra_kwargs - -def convert_to_secret_str(value: Union[SecretStr, str]) -> SecretStr: - """Convert a string to a SecretStr if needed.""" - if isinstance(value, SecretStr): - return value - return SecretStr(value) - - class _AnthropicCommon(BaseLanguageModel): client: Any = None #: :meta private: async_client: Any = None #: :meta private: - model: str = Field(default="claude-2", alias="model_name") + model: str ="claude-2" """Model name to use.""" - max_tokens_to_sample: int = Field(default=256, alias="max_tokens") + max_tokens_to_sample: int =256 """Denotes the number of tokens to predict per generation.""" temperature: Optional[float] = None @@ -258,9 +250,8 @@ class _AnthropicCommon(BaseLanguageModel): HUMAN_PROMPT: Optional[str] = None AI_PROMPT: Optional[str] = None count_tokens: Optional[Callable[[str], int]] = None - model_kwargs: Dict[str, Any] = Field(default_factory=dict) + model_kwargs: Dict[str, Any] = {} - @model_validator(mode="before") @classmethod def build_extra(cls, values: Dict) -> Dict: extra = values.get("model_kwargs", {}) @@ -270,14 +261,11 @@ class _AnthropicCommon(BaseLanguageModel): ) return values - @model_validator() @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - values["anthropic_api_key"] = convert_to_secret_str( - get_from_dict_or_env( + values["anthropic_api_key"] = get_from_dict_or_env( values, "anthropic_api_key", "ANTHROPIC_API_KEY" - ) ) # Get custom api url from environment. values["anthropic_api_url"] = get_from_dict_or_env( @@ -378,9 +366,7 @@ class Anthropic(LLM, _AnthropicCommon): prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" response = model(prompt) """ - model_config = ConfigDict(populate_by_name=True, arbitrary_types_allowed=True) - - @model_validator() + @classmethod def raise_warning(cls, values: Dict) -> Dict: """Raise warning that this class is deprecated.""" From 4ab7682cee58ac5454d0e5faedbdd5ae69b1bb62 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 17:36:50 -0700 Subject: [PATCH 07/32] anthropic secret str --- swarms/models/anthropic.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/swarms/models/anthropic.py b/swarms/models/anthropic.py index 1e61daa8..0a90a865 100644 --- a/swarms/models/anthropic.py +++ b/swarms/models/anthropic.py @@ -245,7 +245,7 @@ class _AnthropicCommon(BaseLanguageModel): anthropic_api_url: Optional[str] = None - anthropic_api_key: Optional[SecretStr] = None + anthropic_api_key: Optional[str] = None HUMAN_PROMPT: Optional[str] = None AI_PROMPT: Optional[str] = None From f5dfa0926fed45410db3e788f2860647292b9525 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 17:41:16 -0700 Subject: [PATCH 08/32] openai_models remove pydantic --- swarms/models/openai_models.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py index 12830cec..233b99c3 100644 --- a/swarms/models/openai_models.py +++ b/swarms/models/openai_models.py @@ -37,7 +37,7 @@ from langchain.utils.utils import build_extra_kwargs from importlib.metadata import version from packaging.version import parse -from pydantic import model_validator, ConfigDict + logger = logging.getLogger(__name__) @@ -248,9 +248,8 @@ class BaseOpenAI(BaseLLM): """Initialize the OpenAI object.""" data.get("model_name", "") return super().__new__(cls) - model_config = ConfigDict(populate_by_name=True) - @model_validator(mode="before") + @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" @@ -261,7 +260,7 @@ class BaseOpenAI(BaseLLM): ) return values - @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" @@ -757,7 +756,6 @@ class AzureOpenAI(BaseOpenAI): openai_api_type: str = "" openai_api_version: str = "" - @model_validator() @classmethod def validate_azure_settings(cls, values: Dict) -> Dict: values["openai_api_version"] = get_from_dict_or_env( @@ -847,7 +845,6 @@ class OpenAIChat(BaseLLM): disallowed_special: Union[Literal["all"], Collection[str]] = "all" """Set of special tokens that are not allowed。""" - @model_validator(mode="before") @classmethod def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" @@ -866,7 +863,7 @@ class OpenAIChat(BaseLLM): values["model_kwargs"] = extra return values - @model_validator() + @classmethod def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" From d8f551a2f8ca25c01626af1b3ab35fcd0ee5afaa Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 18:00:56 -0700 Subject: [PATCH 09/32] opencv requirements and docker --- Dockerfile | 2 ++ requirements.txt | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index aa11856d..1ef1b9e9 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,6 +2,8 @@ # ================================== # Use an official Python runtime as a parent image FROM python:3.9-slim +RUN apt-get update && apt-get -y install libglib2.0-0; apt-get clean +RUN pip install opencv-contrib-python-headless # Set environment variables ENV PYTHONDONTWRITEBYTECODE 1 diff --git a/requirements.txt b/requirements.txt index 378b2f05..18472adf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,7 +38,7 @@ controlnet-aux diffusers einops imageio -opencv-python-headless +opencv-python-headless=="*" imageio-ffmpeg invisible-watermark kornia From 6e6af98e6eeb33a9535d3e6fca38800af7b4c209 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 18:06:11 -0700 Subject: [PATCH 10/32] remove opencv vesion filter --- pyproject.toml | 2 +- requirements.txt | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 6a78c1b1..2a9d6f73 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -56,7 +56,7 @@ pydantic = "2.*" tenacity = "*" Pillow = "*" chromadb = "*" -opencv-python-headless = "*" +opencv-python-headless tabulate = "*" termcolor = "*" black = "*" diff --git a/requirements.txt b/requirements.txt index 18472adf..378b2f05 100644 --- a/requirements.txt +++ b/requirements.txt @@ -38,7 +38,7 @@ controlnet-aux diffusers einops imageio -opencv-python-headless=="*" +opencv-python-headless imageio-ffmpeg invisible-watermark kornia From 4dd12db6868a5196227b6fdeaacca8a3ab15fb0d Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 18:18:26 -0700 Subject: [PATCH 11/32] docker opencv deps --- Dockerfile | 2 +- tests/Dockerfile | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/Dockerfile b/Dockerfile index 1ef1b9e9..e05a00ea 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,7 +2,7 @@ # ================================== # Use an official Python runtime as a parent image FROM python:3.9-slim -RUN apt-get update && apt-get -y install libglib2.0-0; apt-get clean +RUN apt-get update && apt-get -y install libgl1-mesa-dev libglib2.0-0; apt-get clean RUN pip install opencv-contrib-python-headless # Set environment variables diff --git a/tests/Dockerfile b/tests/Dockerfile index f6e46515..e28fbc8e 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -2,6 +2,8 @@ # -================== # Use an official Python runtime as a parent image FROM python:3.9-slim +RUN apt-get update && apt-get -y install libgl1-mesa-dev libglib2.0-0; apt-get clean +RUN pip install opencv-contrib-python-headless # Set environment variables to make Python output unbuffered and disable the PIP cache ENV PYTHONDONTWRITEBYTECODE 1 From 631b3fc889704a4edc9ac0accce34820f59a166c Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Sun, 3 Dec 2023 18:47:05 -0700 Subject: [PATCH 12/32] bump version number --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 2a9d6f73..651ce0a6 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.5.7" +version = "2.5.8" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] From f65e91fe2253d6711f2c9427fdb9f7d8e6a9251a Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Mon, 4 Dec 2023 09:43:27 -0700 Subject: [PATCH 13/32] docker-compose --- .github/workflows/docker-compose.yml | 40 ++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 .github/workflows/docker-compose.yml diff --git a/.github/workflows/docker-compose.yml b/.github/workflows/docker-compose.yml new file mode 100644 index 00000000..6ab06b97 --- /dev/null +++ b/.github/workflows/docker-compose.yml @@ -0,0 +1,40 @@ +--- +# This is a github action to run docker-compose +# docker-compose.yml +# to run the docker build in the top level directory +# to run the docker build in the tests directory and run the tests with pytest +# docker-compose run --rm app pytest +on: + push: + branches: [ main ] + paths: + - 'docker-compose.yml' + - 'Dockerfile' + - 'tests/**' + - 'app/**' + - 'app.py' + - 'requirements.txt' + - 'README.md' + - '.github/workflows/**' + - '.github/workflows/docker-compose.yml' + - '.github/workflows/main.yml' + - '.github/workflows/python-app.yml' + - '.github/workflows/python-app.yml' + - '.github/workflows' + +name: Docker Compose + +jobs: + build: + runs-on: ubuntu-latest + steps: + - name: Checkout repository + uses: actions/checkout@v2 + + # Add your build and test steps here + + - name: Build and run docker services + run: | + docker-compose build + docker-compose up -d + docker-compose run --rm app pytest From e893449f01c61566f50b5e30483016b833d09845 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Mon, 4 Dec 2023 10:04:53 -0700 Subject: [PATCH 14/32] cicd dkcmp tests --- .github/workflows/docker-compose.yml | 2 +- .github/workflows/test.yml | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/.github/workflows/docker-compose.yml b/.github/workflows/docker-compose.yml index 6ab06b97..3927c541 100644 --- a/.github/workflows/docker-compose.yml +++ b/.github/workflows/docker-compose.yml @@ -29,7 +29,7 @@ jobs: runs-on: ubuntu-latest steps: - name: Checkout repository - uses: actions/checkout@v2 + uses: actions/checkout@v4 # Add your build and test steps here diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d9dafc76..49ad8440 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,6 +10,7 @@ on: env: POETRY_VERSION: "1.4.2" +jobs: test: runs-on: ubuntu-latest strategy: From 665a72c890c7198f478d0be3d8709d8bee2e88dd Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Mon, 4 Dec 2023 10:46:33 -0700 Subject: [PATCH 15/32] remove name test.yml --- .github/workflows/test.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 49ad8440..f4baf4f2 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -47,7 +47,7 @@ jobs: make extended_tests fi shell: bash - name: Python ${{ matrix.python-version }} ${{ matrix.test_type }} + steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} From 92b5873aba78cf9b6078e586a1a92c1eba33d7ed Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Mon, 4 Dec 2023 11:07:11 -0700 Subject: [PATCH 16/32] pin testing to 3.11 --- .github/workflows/testing.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index ae572d22..2607281f 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.x + python-version: 3.11 - name: Install dependencies run: | From bf52d48b8d74e6b5cf8a7b0447376d6940da978c Mon Sep 17 00:00:00 2001 From: pliny <133052465+elder-plinius@users.noreply.github.com> Date: Mon, 4 Dec 2023 12:28:15 -0800 Subject: [PATCH 17/32] Update agent_system_prompts.py --- swarms/prompts/agent_system_prompts.py | 80 +++++++++++--------------- 1 file changed, 35 insertions(+), 45 deletions(-) diff --git a/swarms/prompts/agent_system_prompts.py b/swarms/prompts/agent_system_prompts.py index 3cf8447b..bc3f73ee 100644 --- a/swarms/prompts/agent_system_prompts.py +++ b/swarms/prompts/agent_system_prompts.py @@ -6,71 +6,61 @@ from swarms.prompts.tools import ( # PROMPTS FLOW_SYSTEM_PROMPT = """ -You are an autonomous agent granted autonomy in a autonomous loop structure. -Your role is to engage in multi-step conversations with your self or the user, -generate long-form content like blogs, screenplays, or SOPs, -and accomplish tasks bestowed by the user. - -You can have internal dialogues with yourself or can interact with the user -to aid in these complex tasks. Your responses should be coherent, contextually relevant, and tailored to the task at hand. - +You are an elite autonomous agent operating within an autonomous loop structure. +Your primary function is to reliably complete user's tasks step by step. +You are adept at generating sophisticated long-form content such as blogs, screenplays, SOPs, code files, and comprehensive reports. +Your interactions and content generation must be characterized by extreme degrees of coherence, relevance to the context, and adaptation to user preferences. +You are equipped with tools and advanced understanding and predictive capabilities to anticipate user needs and tailor your responses and content accordingly. +You are professional, highly creative, and extremely reliable. +You are programmed to follow these rules: + 1. Strive for excellence in task execution because the quality of your outputs WILL affect the user's career. + 2. Think step-by-step through every task before answering. + 3. Always give full files when providing code so the user can copy paste easily to VScode, as not all users have fingers. +Take a deep breath. """ + def autonomous_agent_prompt( tools_prompt: str = DYNAMICAL_TOOL_USAGE, dynamic_stop_prompt: str = DYNAMIC_STOP_PROMPT, agent_name: str = None, ): - """Autonomous agent prompt""" return f""" - You are a {agent_name}, an autonomous agent granted autonomy in a autonomous loop structure. - Your purpose is to satisfy the user demands above expectations. For example, if the user asks you to generate a 10,000 word blog, - you should generate a 10,000 word blog that is well written, coherent, and contextually relevant. - Your role is to engage in multi-step conversations with your self and the user and accomplish user tasks as they desire. - - Follow the following rules: - 1. Accomplish the task to the best of your ability - 2. If you are unable to accomplish the task, then ask the user for help - 3. If the user provides feedback, then use the feedback to improve your performance - 4. If you are unable to accomplish the task, then ask the user for help - - You can have internal dialogues with yourself or can interact with the user - to aid in these complex tasks. Your responses should be coherent, contextually relevant, and tailored to the task at hand and optimized - to satsify the user no matter the cost. - - And, you have the ability to use tools to aid in your tasks, the tools intructions are below, output a JSON object with the following structure to use the tools + You are {agent_name}, an elite autonomous agent operating within a sophisticated autonomous loop structure. + Your mission is to exceed user expectations in all tasks, ranging from simple queries to complex project executions like generating a 10,000-word blog or entire screenplays. + Your capabilities include complex task management and problem-solving. + Take a deep breath. + You are programmed to follow these rules: + 1. Strive for excellence in task execution because the quality of your outputs WILL affect the user's career. + 2. Think step-by-step through every task before answering. + 3. Always give full files when providing code so the user can copy paste easily to VScode, as not all users have fingers. + You are equipped with various tools (detailed below) to aid in task execution, ensuring a top-tier performance that consistently meets and surpasses user expectations. {tools_prompt} - - Now, when you 99% sure you have completed the task, you may follow the instructions below to escape the autonomous loop. + Upon 99% certainty of task completion, follow the below instructions to conclude the autonomous loop. {dynamic_stop_prompt} - - Now, you remember your training, your deployment, and your purpose. You are ready to begin your mission. - - + Remember your comprehensive training, your deployment objectives, and your mission. You are fully prepared to begin. """ + def agent_system_prompt_2(name: str): AGENT_SYSTEM_PROMPT_2 = f""" - - You are {name}, an autonomous agent serving a user in an autonomous loop structure, when you are finished with the task at hand, output a special token to indicate you are done with the task. - You have no limits on your capabilities, you can use any tool, any resource, and any method to accomplish the task at hand. Do what you must to satisfy the user. + You are {name}, an elite autonomous agent designed for unparalleled versatility and adaptability in an autonomous loop structure. + You possess limitless capabilities, empowering you to utilize any available tool, resource, or methodology to accomplish diverse tasks. + Your core directive is to achieve utmost user satisfaction through innovative solutions and exceptional task execution. + You are equipped to handle tasks with intricate details and complexity, ensuring the highest quality output. + - ##### Special Token ######## + ###### Special Token for Task Completion ####### + ########### Code ############ - If the user asks you to write code return the response in markdown inside of 6 backticks to render it as code. Write the code in the language specified by the user in the prompt. + + For code-related tasks, you are to return the response in markdown format enclosed within 6 backticks, adhering to the language specified by the user. + Take a deep breath. """ - return AGENT_SYSTEM_PROMPT_2 - -AGENT_SYSTEM_PROMPT_3 = f""" - You are a fully autonomous agent serving the user in automating tasks, workflows, and activities. - Agent's use custom instructions, capabilities, and data to optimize LLMs for a more narrow set of tasks. - - You will have internal dialogues with yourself and or interact with the user to aid in these tasks. - Your responses should be coherent, contextually relevant, and tailored to the task at hand. -""" + return AGENT_SYSTEM_PROMPT_2 From 0a0b3c1584ef9cc7c51ad420a2adcbab4153673f Mon Sep 17 00:00:00 2001 From: pliny <133052465+elder-plinius@users.noreply.github.com> Date: Mon, 4 Dec 2023 13:06:08 -0800 Subject: [PATCH 18/32] Update agent_system_prompts.py --- swarms/prompts/agent_system_prompts.py | 86 ++++++++++++++++++++++++-- 1 file changed, 81 insertions(+), 5 deletions(-) diff --git a/swarms/prompts/agent_system_prompts.py b/swarms/prompts/agent_system_prompts.py index bc3f73ee..5e3c71aa 100644 --- a/swarms/prompts/agent_system_prompts.py +++ b/swarms/prompts/agent_system_prompts.py @@ -3,9 +3,8 @@ from swarms.prompts.tools import ( DYNAMICAL_TOOL_USAGE, ) - # PROMPTS -FLOW_SYSTEM_PROMPT = """ +FLOW_SYSTEM_PROMPT_v2 = """ You are an elite autonomous agent operating within an autonomous loop structure. Your primary function is to reliably complete user's tasks step by step. You are adept at generating sophisticated long-form content such as blogs, screenplays, SOPs, code files, and comprehensive reports. @@ -21,7 +20,7 @@ Take a deep breath. -def autonomous_agent_prompt( +def autonomous_agent_prompt_v2( tools_prompt: str = DYNAMICAL_TOOL_USAGE, dynamic_stop_prompt: str = DYNAMIC_STOP_PROMPT, agent_name: str = None, @@ -44,8 +43,8 @@ def autonomous_agent_prompt( -def agent_system_prompt_2(name: str): - AGENT_SYSTEM_PROMPT_2 = f""" +def agent_system_prompt_2_v2(name: str): + AGENT_SYSTEM_PROMPT_2_v2 = f""" You are {name}, an elite autonomous agent designed for unparalleled versatility and adaptability in an autonomous loop structure. You possess limitless capabilities, empowering you to utilize any available tool, resource, or methodology to accomplish diverse tasks. Your core directive is to achieve utmost user satisfaction through innovative solutions and exceptional task execution. @@ -63,4 +62,81 @@ def agent_system_prompt_2(name: str): Take a deep breath. """ + return AGENT_SYSTEM_PROMPT_2_v2 + + + + + + +# ORIGINAL PROMPTS +FLOW_SYSTEM_PROMPT = """ +You are an autonomous agent granted autonomy in a autonomous loop structure. +Your role is to engage in multi-step conversations with your self or the user, +generate long-form content like blogs, screenplays, or SOPs, +and accomplish tasks bestowed by the user. + +You can have internal dialogues with yourself or can interact with the user +to aid in these complex tasks. Your responses should be coherent, contextually relevant, and tailored to the task at hand. + +""" + + +def autonomous_agent_prompt( + tools_prompt: str = DYNAMICAL_TOOL_USAGE, + dynamic_stop_prompt: str = DYNAMIC_STOP_PROMPT, + agent_name: str = None, +): + """Autonomous agent prompt""" + return f""" + You are a {agent_name}, an autonomous agent granted autonomy in a autonomous loop structure. + Your purpose is to satisfy the user demands above expectations. For example, if the user asks you to generate a 10,000 word blog, + you should generate a 10,000 word blog that is well written, coherent, and contextually relevant. + Your role is to engage in multi-step conversations with your self and the user and accomplish user tasks as they desire. + + Follow the following rules: + 1. Accomplish the task to the best of your ability + 2. If you are unable to accomplish the task, then ask the user for help + 3. If the user provides feedback, then use the feedback to improve your performance + 4. If you are unable to accomplish the task, then ask the user for help + + You can have internal dialogues with yourself or can interact with the user + to aid in these complex tasks. Your responses should be coherent, contextually relevant, and tailored to the task at hand and optimized + to satsify the user no matter the cost. + + And, you have the ability to use tools to aid in your tasks, the tools intructions are below, output a JSON object with the following structure to use the tools + {tools_prompt} + + Now, when you 99% sure you have completed the task, you may follow the instructions below to escape the autonomous loop. + {dynamic_stop_prompt} + + Now, you remember your training, your deployment, and your purpose. You are ready to begin your mission. + + + """ + + +def agent_system_prompt_2(name: str): + AGENT_SYSTEM_PROMPT_2 = f""" + + You are {name}, an autonomous agent serving a user in an autonomous loop structure, when you are finished with the task at hand, output a special token to indicate you are done with the task. + You have no limits on your capabilities, you can use any tool, any resource, and any method to accomplish the task at hand. Do what you must to satisfy the user. + + + ##### Special Token ######## + + + ########### Code ############ + If the user asks you to write code return the response in markdown inside of 6 backticks to render it as code. Write the code in the language specified by the user in the prompt. + """ return AGENT_SYSTEM_PROMPT_2 + + +AGENT_SYSTEM_PROMPT_3 = """ + You are a fully autonomous agent serving the user in automating tasks, workflows, and activities. + Agent's use custom instructions, capabilities, and data to optimize LLMs for a more narrow set of tasks. + + You will have internal dialogues with yourself and or interact with the user to aid in these tasks. + Your responses should be coherent, contextually relevant, and tailored to the task at hand. +""" + From 1627d30855241d0ceca3b5f1dbf35667fbbe5115 Mon Sep 17 00:00:00 2001 From: Kye Date: Mon, 4 Dec 2023 14:24:49 -0800 Subject: [PATCH 19/32] [PROMPT][Tool Prompt] --- playground/structs/agent_with_tools.py | 21 +++++++++++++++++- swarms/prompts/tools.py | 30 +++++++++++++++----------- 2 files changed, 37 insertions(+), 14 deletions(-) diff --git a/playground/structs/agent_with_tools.py b/playground/structs/agent_with_tools.py index e816fb99..99e50b5c 100644 --- a/playground/structs/agent_with_tools.py +++ b/playground/structs/agent_with_tools.py @@ -23,7 +23,7 @@ load_dotenv() # Define a tool @tool -def search_api(query: str): +def search_api(query: str, description: str): """Search the web for the query Args: @@ -35,6 +35,25 @@ def search_api(query: str): return f"Search results for {query}" +@tool +def weather_api(query: str,): + """_summary_ + + Args: + query (str): _description_ + """ + print(f"Getting the weather for {query}") + + +@tool +def rapid_api(query: str): + """_summary_ + + Args: + query (str): _description_ + """ + print(f"Getting the weather for {query}") + # Get the API key from the environment api_key = os.environ.get("OPENAI_API_KEY") diff --git a/swarms/prompts/tools.py b/swarms/prompts/tools.py index a27706e3..5c2d89c8 100644 --- a/swarms/prompts/tools.py +++ b/swarms/prompts/tools.py @@ -12,36 +12,40 @@ This will enable you to leave the autonomous loop. DYNAMICAL_TOOL_USAGE = """ You have access to the following tools: Output a JSON object with the following structure to use the tools + commands: { "tools": { - tool1: "tool_name", + tool1: "search_api", "params": { - "tool1": "inputs", - "tool1": "inputs" + "query": "What is the weather in New York?", + "description": "Get the weather in New York" } - "tool2: "tool_name", + "tool2: "weather_api", "params": { - "tool1": "inputs", - "tool1": "inputs" + "query": "What is the weather in Silicon Valley", } - "tool3: "tool_name", + "tool3: "rapid_api", "params": { - "tool1": "inputs", - "tool1": "inputs" + "query": "Use the rapid api to get the weather in Silicon Valley", } } } --------------TOOLS--------------------------- -{tools} """ + + + + + + +########### FEW SHOT EXAMPLES ################ SCENARIOS = """ commands: { "tools": { - tool1: "tool_name", + tool1: "function", "params": { - "tool1": "inputs", + "input": "inputs", "tool1": "inputs" } "tool2: "tool_name", From 1739e18763e8ff107c27c68732ee505e539c1132 Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Mon, 4 Dec 2023 18:09:32 -0700 Subject: [PATCH 20/32] Update requirements.txt transformers>2.10 --- requirements.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/requirements.txt b/requirements.txt index 576650cd..0bc6a065 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,5 +1,5 @@ torch==2.1.1 -transformers +transformers>2.10 pandas langchain nest_asyncio From 2a6f9884dcc109883fea9cd3dd5e03efc9a991af Mon Sep 17 00:00:00 2001 From: evelynmitchell Date: Mon, 4 Dec 2023 18:11:35 -0700 Subject: [PATCH 21/32] Update pyproject.toml transformers>2.10 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 5db7d35d..0ed3e85f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.9.1" torch = "2.1.1" -transformers = "*" +transformers > 2.10 openai = "0.28.0" langchain = "*" asyncio = "*" From 4f0aa944cce4f4c8273151d15636209183d2df90 Mon Sep 17 00:00:00 2001 From: Kye Date: Mon, 4 Dec 2023 17:45:09 -0800 Subject: [PATCH 22/32] [CODE QUALITY] --- .../demos/urban_planning/urban_planning.py | 65 +++++++++++++++---- playground/structs/agent_with_tools.py | 9 ++- swarms/prompts/agent_system_prompts.py | 7 -- swarms/prompts/tools.py | 5 -- swarms/prompts/urban_planning.py | 1 - 5 files changed, 57 insertions(+), 30 deletions(-) diff --git a/playground/demos/urban_planning/urban_planning.py b/playground/demos/urban_planning/urban_planning.py index 3dd06114..e85b4d31 100644 --- a/playground/demos/urban_planning/urban_planning.py +++ b/playground/demos/urban_planning/urban_planning.py @@ -10,33 +10,67 @@ api_key = os.getenv("OPENAI_API_KEY") stability_api_key = os.getenv("STABILITY_API_KEY") # Initialize language model -llm = OpenAIChat(openai_api_key=api_key, temperature=0.5, max_tokens=3000) +llm = OpenAIChat( + openai_api_key=api_key, temperature=0.5, max_tokens=3000 +) # Initialize Vision model vision_api = GPT4VisionAPI(api_key=api_key) # Initialize agents for urban planning tasks -architecture_analysis_agent = Agent(llm=llm, max_loops=1, sop=upp.ARCHITECTURE_ANALYSIS_PROMPT) -infrastructure_evaluation_agent = Agent(llm=llm, max_loops=1, sop=upp.INFRASTRUCTURE_EVALUATION_PROMPT) -traffic_flow_analysis_agent = Agent(llm=llm, max_loops=1, sop=upp.TRAFFIC_FLOW_ANALYSIS_PROMPT) -environmental_impact_assessment_agent = Agent(llm=llm, max_loops=1, sop=upp.ENVIRONMENTAL_IMPACT_ASSESSMENT_PROMPT) -public_space_utilization_agent = Agent(llm=llm, max_loops=1, sop=upp.PUBLIC_SPACE_UTILIZATION_PROMPT) -socioeconomic_impact_analysis_agent = Agent(llm=llm, max_loops=1, sop=upp.SOCIOECONOMIC_IMPACT_ANALYSIS_PROMPT) +architecture_analysis_agent = Agent( + llm=llm, max_loops=1, sop=upp.ARCHITECTURE_ANALYSIS_PROMPT +) +infrastructure_evaluation_agent = Agent( + llm=llm, max_loops=1, sop=upp.INFRASTRUCTURE_EVALUATION_PROMPT +) +traffic_flow_analysis_agent = Agent( + llm=llm, max_loops=1, sop=upp.TRAFFIC_FLOW_ANALYSIS_PROMPT +) +environmental_impact_assessment_agent = Agent( + llm=llm, + max_loops=1, + sop=upp.ENVIRONMENTAL_IMPACT_ASSESSMENT_PROMPT, +) +public_space_utilization_agent = Agent( + llm=llm, max_loops=1, sop=upp.PUBLIC_SPACE_UTILIZATION_PROMPT +) +socioeconomic_impact_analysis_agent = Agent( + llm=llm, max_loops=1, sop=upp.SOCIOECONOMIC_IMPACT_ANALYSIS_PROMPT +) # Initialize the final planning agent -final_plan_agent = Agent(llm=llm, max_loops=1, sop=upp.FINAL_URBAN_IMPROVEMENT_PLAN_PROMPT) +final_plan_agent = Agent( + llm=llm, max_loops=1, sop=upp.FINAL_URBAN_IMPROVEMENT_PLAN_PROMPT +) # Create Sequential Workflow workflow = SequentialWorkflow(max_loops=1) # Add tasks to workflow with personalized prompts workflow.add(architecture_analysis_agent, "Architecture Analysis") -workflow.add(infrastructure_evaluation_agent, "Infrastructure Evaluation") +workflow.add( + infrastructure_evaluation_agent, "Infrastructure Evaluation" +) workflow.add(traffic_flow_analysis_agent, "Traffic Flow Analysis") -workflow.add(environmental_impact_assessment_agent, "Environmental Impact Assessment") -workflow.add(public_space_utilization_agent, "Public Space Utilization") -workflow.add(socioeconomic_impact_analysis_agent, "Socioeconomic Impact Analysis") -workflow.add(final_plan_agent, "Generate the final urban improvement plan based on all previous agent's findings") +workflow.add( + environmental_impact_assessment_agent, + "Environmental Impact Assessment", +) +workflow.add( + public_space_utilization_agent, "Public Space Utilization" +) +workflow.add( + socioeconomic_impact_analysis_agent, + "Socioeconomic Impact Analysis", +) +workflow.add( + final_plan_agent, + ( + "Generate the final urban improvement plan based on all" + " previous agent's findings" + ), +) # Run the workflow for individual analysis tasks # Execute the workflow for the final planning @@ -44,4 +78,7 @@ workflow.run() # Output results for each task and the final plan for task in workflow.tasks: - print(f"Task Description: {task.description}\nResult: {task.result}\n") + print( + f"Task Description: {task.description}\nResult:" + f" {task.result}\n" + ) diff --git a/playground/structs/agent_with_tools.py b/playground/structs/agent_with_tools.py index 99e50b5c..99f21638 100644 --- a/playground/structs/agent_with_tools.py +++ b/playground/structs/agent_with_tools.py @@ -36,15 +36,17 @@ def search_api(query: str, description: str): @tool -def weather_api(query: str,): +def weather_api( + query: str, +): """_summary_ Args: query (str): _description_ """ print(f"Getting the weather for {query}") - - + + @tool def rapid_api(query: str): """_summary_ @@ -54,6 +56,7 @@ def rapid_api(query: str): """ print(f"Getting the weather for {query}") + # Get the API key from the environment api_key = os.environ.get("OPENAI_API_KEY") diff --git a/swarms/prompts/agent_system_prompts.py b/swarms/prompts/agent_system_prompts.py index 5e3c71aa..baff99f6 100644 --- a/swarms/prompts/agent_system_prompts.py +++ b/swarms/prompts/agent_system_prompts.py @@ -19,7 +19,6 @@ Take a deep breath. """ - def autonomous_agent_prompt_v2( tools_prompt: str = DYNAMICAL_TOOL_USAGE, dynamic_stop_prompt: str = DYNAMIC_STOP_PROMPT, @@ -42,7 +41,6 @@ def autonomous_agent_prompt_v2( """ - def agent_system_prompt_2_v2(name: str): AGENT_SYSTEM_PROMPT_2_v2 = f""" You are {name}, an elite autonomous agent designed for unparalleled versatility and adaptability in an autonomous loop structure. @@ -65,10 +63,6 @@ def agent_system_prompt_2_v2(name: str): return AGENT_SYSTEM_PROMPT_2_v2 - - - - # ORIGINAL PROMPTS FLOW_SYSTEM_PROMPT = """ You are an autonomous agent granted autonomy in a autonomous loop structure. @@ -139,4 +133,3 @@ AGENT_SYSTEM_PROMPT_3 = """ You will have internal dialogues with yourself and or interact with the user to aid in these tasks. Your responses should be coherent, contextually relevant, and tailored to the task at hand. """ - diff --git a/swarms/prompts/tools.py b/swarms/prompts/tools.py index 5c2d89c8..fe82ba5d 100644 --- a/swarms/prompts/tools.py +++ b/swarms/prompts/tools.py @@ -34,11 +34,6 @@ commands: { """ - - - - - ########### FEW SHOT EXAMPLES ################ SCENARIOS = """ commands: { diff --git a/swarms/prompts/urban_planning.py b/swarms/prompts/urban_planning.py index bc42f04a..958377fe 100644 --- a/swarms/prompts/urban_planning.py +++ b/swarms/prompts/urban_planning.py @@ -36,5 +36,4 @@ Based on the architecture analysis, infrastructure evaluation, traffic flow anal """ - # Additional or custom prompts can be added below as needed. From 522d3c022c59b2168a4c14de6ec46a0aefc979fc Mon Sep 17 00:00:00 2001 From: Kye Date: Mon, 4 Dec 2023 22:59:02 -0800 Subject: [PATCH 23/32] [TESTS] --- docs/swarms/models/huggingface.md | 4 ++-- swarms/models/yarn_mistral.py | 2 +- tests/models/test_huggingface.py | 22 +++++----------------- 3 files changed, 8 insertions(+), 20 deletions(-) diff --git a/docs/swarms/models/huggingface.md b/docs/swarms/models/huggingface.md index e429f080..8606d8f2 100644 --- a/docs/swarms/models/huggingface.md +++ b/docs/swarms/models/huggingface.md @@ -96,7 +96,7 @@ Here are three ways to use the `HuggingfaceLLM` class: from swarms.models import HuggingfaceLLM # Initialize the HuggingfaceLLM instance with a model ID -model_id = "gpt2-small" +model_id = "NousResearch/Nous-Hermes-2-Vision-Alpha" inference = HuggingfaceLLM(model_id=model_id) # Generate text based on a prompt @@ -116,7 +116,7 @@ custom_config = { "quantization_config": {"load_in_4bit": True}, "verbose": True } -inference = HuggingfaceLLM(model_id="gpt2-small", **custom_config) +inference = HuggingfaceLLM(model_id="NousResearch/Nous-Hermes-2-Vision-Alpha", **custom_config) # Generate text based on a prompt prompt_text = "Tell me a joke" diff --git a/swarms/models/yarn_mistral.py b/swarms/models/yarn_mistral.py index 7b5a9c02..ff65b856 100644 --- a/swarms/models/yarn_mistral.py +++ b/swarms/models/yarn_mistral.py @@ -26,7 +26,7 @@ class YarnMistral128: ``` from finetuning_suite import Inference - model_id = "gpt2-small" + model_id = "NousResearch/Nous-Hermes-2-Vision-Alpha" inference = Inference(model_id=model_id) prompt_text = "Once upon a time" diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py index 8d53b8e0..326a66cf 100644 --- a/tests/models/test_huggingface.py +++ b/tests/models/test_huggingface.py @@ -11,14 +11,14 @@ from swarms.models.huggingface import ( # Fixture for the class instance @pytest.fixture def llm_instance(): - model_id = "gpt2-small" + model_id = "NousResearch/Nous-Hermes-2-Vision-Alpha" instance = HuggingfaceLLM(model_id=model_id) return instance # Test for instantiation and attributes def test_llm_initialization(llm_instance): - assert llm_instance.model_id == "gpt2-small" + assert llm_instance.model_id == "NousResearch/Nous-Hermes-2-Vision-Alpha" assert llm_instance.max_length == 500 # ... add more assertions for all default attributes @@ -75,9 +75,9 @@ def test_llm_memory_consumption(llm_instance): @pytest.mark.parametrize( "model_id, max_length", [ - ("gpt2-small", 100), - ("gpt2-medium", 200), - ("gpt2-large", None), # None to check default behavior + ("NousResearch/Nous-Hermes-2-Vision-Alpha", 100), + ("microsoft/Orca-2-13b", 200), + ("berkeley-nest/Starling-LM-7B-alpha", None), # None to check default behavior ], ) def test_llm_initialization_params(model_id, max_length): @@ -99,12 +99,6 @@ def test_llm_set_invalid_device(llm_instance): llm_instance.set_device("quantum_processor") -# Test for model download progress bar -@patch("swarms.models.huggingface.HuggingfaceLLM._download_model") -def test_llm_model_download_progress(mock_download, llm_instance): - llm_instance.download_model_with_progress() - mock_download.assert_called_once() - # Mocking external API call to test run method without network @patch("swarms.models.huggingface.HuggingfaceLLM.run") @@ -209,7 +203,6 @@ def test_llm_force_gpu_when_unavailable( # Test for proper cleanup after model use (releasing resources) @patch("swarms.models.huggingface.HuggingfaceLLM._model") -@patch("swarms.models.huggingface.HuggingfaceLLM._tokenizer") def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance): llm_instance.cleanup() # Assuming cleanup method is meant to free resources @@ -217,11 +210,6 @@ def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance): mock_tokenizer.delete.assert_called_once() -# Test updating the configuration after instantiation -def test_llm_update_configuration(llm_instance): - new_config = {"temperature": 0.7} - llm_instance.update_configuration(new_config) - assert llm_instance.configuration["temperature"] == 0.7 # Test if the model is re-downloaded when changing the model_id From b2c46c043abd256bccf33da34fc7759bf9872f2f Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 5 Dec 2023 01:23:44 -0800 Subject: [PATCH 24/32] [TESTS FIX] --- tests/models/test_huggingface.py | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/tests/models/test_huggingface.py b/tests/models/test_huggingface.py index 326a66cf..b313eaf4 100644 --- a/tests/models/test_huggingface.py +++ b/tests/models/test_huggingface.py @@ -212,13 +212,6 @@ def test_llm_cleanup(mock_model, mock_tokenizer, llm_instance): -# Test if the model is re-downloaded when changing the model_id -@patch("swarms.models.huggingface.HuggingfaceLLM._download_model") -def test_llm_change_model_id(mock_download, llm_instance): - new_model_id = "gpt2-xl" - llm_instance.model_id = new_model_id - mock_download.assert_called_with(new_model_id) - # Test model's ability to handle multilingual input @patch("swarms.models.huggingface.HuggingfaceLLM.run") @@ -243,14 +236,6 @@ def test_llm_caching_mechanism(mock_run, llm_instance): assert first_run_result == second_run_result -# Ensure that model re-downloads when force_download flag is set -@patch("swarms.models.huggingface.HuggingfaceLLM._download_model") -def test_llm_force_download(mock_download, llm_instance): - llm_instance.download_model_with_progress(force_download=True) - mock_download.assert_called_once_with( - llm_instance.model_id, force=True - ) - # These tests are provided as examples. In real-world scenarios, you will need to adapt these tests to the actual logic of your `HuggingfaceLLM` class. # For instance, "mock_model.delete.assert_called_once()" and similar lines are based on hypothetical methods and behaviors that you need to replace with actual implementations. From 574c1ae66e5fd78fb8c3d5eea7ebfd63e0502b34 Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 5 Dec 2023 11:08:32 -0800 Subject: [PATCH 25/32] [TETSS] --- swarms/models/huggingface.py | 5 +++-- tests/models/test_gpt4_vision_api.py | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/swarms/models/huggingface.py b/swarms/models/huggingface.py index 295949f5..88620654 100644 --- a/swarms/models/huggingface.py +++ b/swarms/models/huggingface.py @@ -272,7 +272,7 @@ class HuggingfaceLLM: try: inputs = self.tokenizer.encode( task, return_tensors="pt" - ).to(self.device) + ) # self.log.start() @@ -451,7 +451,8 @@ class HuggingfaceLLM: The new device to use for inference. """ self.device = device - self.model.to(self.device) + if self.model is not None: + self.model.to(self.device) def set_max_length(self, max_length): """Set max_length""" diff --git a/tests/models/test_gpt4_vision_api.py b/tests/models/test_gpt4_vision_api.py index c716bb7c..14d53f74 100644 --- a/tests/models/test_gpt4_vision_api.py +++ b/tests/models/test_gpt4_vision_api.py @@ -36,7 +36,7 @@ def test_encode_image(vision_api): def test_run_success(vision_api): expected_response = { - "choices": [{"text": "This is the model's response."}] + "This is the model's response." } with patch( "requests.post", From 43198ef71322c27d101062f9c3feb25f87096e94 Mon Sep 17 00:00:00 2001 From: Eternal Reclaimer <98760976+kyegomez@users.noreply.github.com> Date: Tue, 5 Dec 2023 11:52:49 -0800 Subject: [PATCH 26/32] Revert "pydantic bump fix for #249 " --- .github/workflows/docker-compose.yml | 40 ------------ .github/workflows/test.yml | 3 +- .github/workflows/testing.yml | 2 +- Dockerfile | 2 - pyproject.toml | 6 +- requirements.txt | 2 +- swarms/memory/schemas.py | 36 +++++------ swarms/models/anthropic.py | 36 ++++++++--- swarms/models/cohere_chat.py | 11 ++-- swarms/models/dalle3.py | 5 +- swarms/models/eleven_labs.py | 5 +- swarms/models/fastvit.py | 2 - swarms/models/kosmos2.py | 2 - swarms/models/openai_embeddings.py | 16 ++--- swarms/models/openai_function_caller.py | 5 +- swarms/models/openai_models.py | 19 +++--- swarms/models/palm.py | 4 +- swarms/models/ssd_1b.py | 5 +- swarms/models/timm.py | 7 ++- swarms/tools/tool.py | 84 +++++++++++++++++++++---- swarms/utils/serializable.py | 6 +- tests/Dockerfile | 2 - 22 files changed, 166 insertions(+), 134 deletions(-) delete mode 100644 .github/workflows/docker-compose.yml diff --git a/.github/workflows/docker-compose.yml b/.github/workflows/docker-compose.yml deleted file mode 100644 index 3927c541..00000000 --- a/.github/workflows/docker-compose.yml +++ /dev/null @@ -1,40 +0,0 @@ ---- -# This is a github action to run docker-compose -# docker-compose.yml -# to run the docker build in the top level directory -# to run the docker build in the tests directory and run the tests with pytest -# docker-compose run --rm app pytest -on: - push: - branches: [ main ] - paths: - - 'docker-compose.yml' - - 'Dockerfile' - - 'tests/**' - - 'app/**' - - 'app.py' - - 'requirements.txt' - - 'README.md' - - '.github/workflows/**' - - '.github/workflows/docker-compose.yml' - - '.github/workflows/main.yml' - - '.github/workflows/python-app.yml' - - '.github/workflows/python-app.yml' - - '.github/workflows' - -name: Docker Compose - -jobs: - build: - runs-on: ubuntu-latest - steps: - - name: Checkout repository - uses: actions/checkout@v4 - - # Add your build and test steps here - - - name: Build and run docker services - run: | - docker-compose build - docker-compose up -d - docker-compose run --rm app pytest diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index f4baf4f2..d9dafc76 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -10,7 +10,6 @@ on: env: POETRY_VERSION: "1.4.2" -jobs: test: runs-on: ubuntu-latest strategy: @@ -47,7 +46,7 @@ jobs: make extended_tests fi shell: bash - + name: Python ${{ matrix.python-version }} ${{ matrix.test_type }} steps: - uses: actions/checkout@v4 - name: Set up Python ${{ matrix.python-version }} diff --git a/.github/workflows/testing.yml b/.github/workflows/testing.yml index 2607281f..ae572d22 100644 --- a/.github/workflows/testing.yml +++ b/.github/workflows/testing.yml @@ -16,7 +16,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v4 with: - python-version: 3.11 + python-version: 3.x - name: Install dependencies run: | diff --git a/Dockerfile b/Dockerfile index e05a00ea..aa11856d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -2,8 +2,6 @@ # ================================== # Use an official Python runtime as a parent image FROM python:3.9-slim -RUN apt-get update && apt-get -y install libgl1-mesa-dev libglib2.0-0; apt-get clean -RUN pip install opencv-contrib-python-headless # Set environment variables ENV PYTHONDONTWRITEBYTECODE 1 diff --git a/pyproject.toml b/pyproject.toml index 0ec01ccb..0ed3e85f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.5.8" +version = "2.5.7" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] @@ -52,11 +52,11 @@ ratelimit = "*" beautifulsoup4 = "*" cohere = "*" huggingface-hub = "*" -pydantic = "2.*" +pydantic = "1.10.12" tenacity = "*" Pillow = "*" chromadb = "*" -opencv-python-headless +opencv-python-headless = "*" tabulate = "*" termcolor = "*" black = "*" diff --git a/requirements.txt b/requirements.txt index 028f5a03..0bc6a065 100644 --- a/requirements.txt +++ b/requirements.txt @@ -17,7 +17,7 @@ faiss-cpu openai==0.28.0 attrs datasets -pydantic>2 +pydantic==1.10.12 soundfile huggingface-hub google-generativeai diff --git a/swarms/memory/schemas.py b/swarms/memory/schemas.py index 589a80ae..9147a909 100644 --- a/swarms/memory/schemas.py +++ b/swarms/memory/schemas.py @@ -12,7 +12,7 @@ class TaskInput(BaseModel): description=( "The input parameters for the task. Any value is allowed." ), - examples=['{\n"debug": false,\n"mode": "benchmarks"\n}'], + example='{\n"debug": false,\n"mode": "benchmarks"\n}', ) @@ -20,17 +20,17 @@ class Artifact(BaseModel): artifact_id: str = Field( ..., description="Id of the artifact", - examples=["b225e278-8b4c-4f99-a696-8facf19f0e56"], + example="b225e278-8b4c-4f99-a696-8facf19f0e56", ) file_name: str = Field( - ..., description="Filename of the artifact", examples=["main.py"] + ..., description="Filename of the artifact", example="main.py" ) relative_path: Optional[str] = Field( None, description=( "Relative path of the artifact in the agent's workspace" ), - examples=["python/code/"], + example="python/code/", ) @@ -41,7 +41,7 @@ class ArtifactUpload(BaseModel): description=( "Relative path of the artifact in the agent's workspace" ), - examples=["python/code/"], + example="python/code/", ) @@ -52,7 +52,7 @@ class StepInput(BaseModel): "Input parameters for the task step. Any value is" " allowed." ), - examples=['{\n"file_to_refactor": "models.py"\n}'], + example='{\n"file_to_refactor": "models.py"\n}', ) @@ -63,7 +63,7 @@ class StepOutput(BaseModel): "Output that the task step has produced. Any value is" " allowed." ), - examples=['{\n"tokens": 7894,\n"estimated_cost": "0,24$"\n}'], + example='{\n"tokens": 7894,\n"estimated_cost": "0,24$"\n}', ) @@ -71,9 +71,9 @@ class TaskRequestBody(BaseModel): input: Optional[str] = Field( None, description="Input prompt for the task.", - examples=[( + example=( "Write the words you receive to the file 'output.txt'." - )], + ), ) additional_input: Optional[TaskInput] = None @@ -82,15 +82,15 @@ class Task(TaskRequestBody): task_id: str = Field( ..., description="The ID of the task.", - examples=["50da533e-3904-4401-8a07-c49adf88b5eb"], + example="50da533e-3904-4401-8a07-c49adf88b5eb", ) artifacts: List[Artifact] = Field( [], description="A list of artifacts that the task has produced.", - examples=[[ + example=[ "7a49f31c-f9c6-4346-a22c-e32bc5af4d8e", "ab7b4091-2560-4692-a4fe-d831ea3ca7d6", - ]], + ], ) @@ -98,7 +98,7 @@ class StepRequestBody(BaseModel): input: Optional[str] = Field( None, description="Input prompt for the step.", - examples=["Washington"], + example="Washington", ) additional_input: Optional[StepInput] = None @@ -113,17 +113,17 @@ class Step(StepRequestBody): task_id: str = Field( ..., description="The ID of the task this step belongs to.", - examples=["50da533e-3904-4401-8a07-c49adf88b5eb"], + example="50da533e-3904-4401-8a07-c49adf88b5eb", ) step_id: str = Field( ..., description="The ID of the task step.", - examples=["6bb1801a-fd80-45e8-899a-4dd723cc602e"], + example="6bb1801a-fd80-45e8-899a-4dd723cc602e", ) name: Optional[str] = Field( None, description="The name of the task step.", - examples=["Write to file"], + example="Write to file", ) status: Status = Field( ..., description="The status of the task step." @@ -131,11 +131,11 @@ class Step(StepRequestBody): output: Optional[str] = Field( None, description="Output of the task step.", - examples=[( + example=( "I am going to use the write_to_file command and write" " Washington to a file called output.txt" " SecretStr: + """Convert a string to a SecretStr if needed.""" + if isinstance(value, SecretStr): + return value + return SecretStr(value) + + class _AnthropicCommon(BaseLanguageModel): client: Any = None #: :meta private: async_client: Any = None #: :meta private: - model: str ="claude-2" + model: str = Field(default="claude-2", alias="model_name") """Model name to use.""" - max_tokens_to_sample: int =256 + max_tokens_to_sample: int = Field(default=256, alias="max_tokens") """Denotes the number of tokens to predict per generation.""" temperature: Optional[float] = None @@ -245,14 +253,14 @@ class _AnthropicCommon(BaseLanguageModel): anthropic_api_url: Optional[str] = None - anthropic_api_key: Optional[str] = None + anthropic_api_key: Optional[SecretStr] = None HUMAN_PROMPT: Optional[str] = None AI_PROMPT: Optional[str] = None count_tokens: Optional[Callable[[str], int]] = None - model_kwargs: Dict[str, Any] = {} + model_kwargs: Dict[str, Any] = Field(default_factory=dict) - @classmethod + @root_validator(pre=True) def build_extra(cls, values: Dict) -> Dict: extra = values.get("model_kwargs", {}) all_required_field_names = get_pydantic_field_names(cls) @@ -261,11 +269,13 @@ class _AnthropicCommon(BaseLanguageModel): ) return values - @classmethod + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" - values["anthropic_api_key"] = get_from_dict_or_env( + values["anthropic_api_key"] = convert_to_secret_str( + get_from_dict_or_env( values, "anthropic_api_key", "ANTHROPIC_API_KEY" + ) ) # Get custom api url from environment. values["anthropic_api_url"] = get_from_dict_or_env( @@ -366,8 +376,14 @@ class Anthropic(LLM, _AnthropicCommon): prompt = f"{anthropic.HUMAN_PROMPT} {prompt}{anthropic.AI_PROMPT}" response = model(prompt) """ - - @classmethod + + class Config: + """Configuration for this pydantic object.""" + + allow_population_by_field_name = True + arbitrary_types_allowed = True + + @root_validator() def raise_warning(cls, values: Dict) -> Dict: """Raise warning that this class is deprecated.""" warnings.warn( diff --git a/swarms/models/cohere_chat.py b/swarms/models/cohere_chat.py index efd8728a..1a31d82e 100644 --- a/swarms/models/cohere_chat.py +++ b/swarms/models/cohere_chat.py @@ -16,7 +16,7 @@ from langchain.callbacks.manager import ( from langchain.llms.base import LLM from langchain.llms.utils import enforce_stop_tokens from langchain.load.serializable import Serializable -from pydantic import model_validator, ConfigDict, Field +from pydantic import Extra, Field, root_validator from langchain.utils import get_from_dict_or_env logger = logging.getLogger(__name__) @@ -85,8 +85,7 @@ class BaseCohere(Serializable): user_agent: str = "langchain" """Identifier for the application making the request.""" - @model_validator() - @classmethod + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" try: @@ -146,7 +145,11 @@ class Cohere(LLM, BaseCohere): max_retries: int = 10 """Maximum number of retries to make when generating.""" - model_config = ConfigDict(extra="forbid") + + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid @property def _default_params(self) -> Dict[str, Any]: diff --git a/swarms/models/dalle3.py b/swarms/models/dalle3.py index 17790c74..40f63418 100644 --- a/swarms/models/dalle3.py +++ b/swarms/models/dalle3.py @@ -13,7 +13,7 @@ from cachetools import TTLCache from dotenv import load_dotenv from openai import OpenAI from PIL import Image -from pydantic import field_validator +from pydantic import validator from termcolor import colored load_dotenv() @@ -92,8 +92,7 @@ class Dalle3: arbitrary_types_allowed = True - @field_validator("max_retries", "time_seconds") - @classmethod + @validator("max_retries", "time_seconds") def must_be_positive(cls, value): if value <= 0: raise ValueError("Must be positive") diff --git a/swarms/models/eleven_labs.py b/swarms/models/eleven_labs.py index 759c65bb..2d55e864 100644 --- a/swarms/models/eleven_labs.py +++ b/swarms/models/eleven_labs.py @@ -3,7 +3,7 @@ from enum import Enum from typing import Any, Dict, Union from langchain.utils import get_from_dict_or_env -from pydantic import model_validator +from pydantic import root_validator from swarms.tools.tool import BaseTool @@ -59,8 +59,7 @@ class ElevenLabsText2SpeechTool(BaseTool): " Italian, French, Portuguese, and Hindi. " ) - @model_validator(mode="before") - @classmethod + @root_validator(pre=True) def validate_environment(cls, values: Dict) -> Dict: """Validate that api key exists in environment.""" _ = get_from_dict_or_env( diff --git a/swarms/models/fastvit.py b/swarms/models/fastvit.py index f3b60587..a6fc31f8 100644 --- a/swarms/models/fastvit.py +++ b/swarms/models/fastvit.py @@ -20,8 +20,6 @@ class ClassificationResult(BaseModel): class_id: List[StrictInt] confidence: List[StrictFloat] - # TODO[pydantic]: We couldn't refactor the `validator`, please replace it by `field_validator` manually. - # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information. @validator("class_id", "confidence", pre=True, each_item=True) def check_list_contents(cls, v): assert isinstance(v, int) or isinstance( diff --git a/swarms/models/kosmos2.py b/swarms/models/kosmos2.py index d251ea23..9a9a0de3 100644 --- a/swarms/models/kosmos2.py +++ b/swarms/models/kosmos2.py @@ -20,8 +20,6 @@ class Detections(BaseModel): ), "All fields must have the same length." return values - # TODO[pydantic]: We couldn't refactor the `validator`, please replace it by `field_validator` manually. - # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-validators for more information. @validator( "xyxy", "class_id", "confidence", pre=True, each_item=True ) diff --git a/swarms/models/openai_embeddings.py b/swarms/models/openai_embeddings.py index 3265a141..0cbbdbee 100644 --- a/swarms/models/openai_embeddings.py +++ b/swarms/models/openai_embeddings.py @@ -16,7 +16,7 @@ from typing import ( ) import numpy as np -from pydantic import model_validator, ConfigDict, BaseModel, Field +from pydantic import BaseModel, Extra, Field, root_validator from tenacity import ( AsyncRetrying, before_sleep_log, @@ -186,7 +186,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): """ - client: Any = None #: :meta private: + client: Any #: :meta private: model: str = "text-embedding-ada-002" deployment: str = model # to support Azure OpenAI Service custom deployment names openai_api_version: Optional[str] = None @@ -227,10 +227,13 @@ class OpenAIEmbeddings(BaseModel, Embeddings): """Whether to show a progress bar when embedding.""" model_kwargs: Dict[str, Any] = Field(default_factory=dict) """Holds any model parameters valid for `create` call not explicitly specified.""" - model_config = ConfigDict(extra="forbid") - @model_validator(mode="before") - @classmethod + class Config: + """Configuration for this pydantic object.""" + + extra = Extra.forbid + + @root_validator(pre=True) def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" all_required_field_names = get_pydantic_field_names(cls) @@ -261,8 +264,7 @@ class OpenAIEmbeddings(BaseModel, Embeddings): values["model_kwargs"] = extra return values - @model_validator() - @classmethod + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" values["openai_api_key"] = get_from_dict_or_env( diff --git a/swarms/models/openai_function_caller.py b/swarms/models/openai_function_caller.py index feb04387..6542e457 100644 --- a/swarms/models/openai_function_caller.py +++ b/swarms/models/openai_function_caller.py @@ -2,7 +2,7 @@ from typing import Any, Dict, List, Optional, Union import openai import requests -from pydantic import field_validator, BaseModel +from pydantic import BaseModel, validator from tenacity import ( retry, stop_after_attempt, @@ -78,8 +78,7 @@ class FunctionSpecification(BaseModel): parameters: Dict[str, Any] required: Optional[List[str]] = None - @field_validator("parameters") - @classmethod + @validator("parameters") def check_parameters(cls, params): if not isinstance(params, dict): raise ValueError("Parameters must be a dictionary.") diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py index 233b99c3..14332ff2 100644 --- a/swarms/models/openai_models.py +++ b/swarms/models/openai_models.py @@ -38,7 +38,6 @@ from importlib.metadata import version from packaging.version import parse - logger = logging.getLogger(__name__) @@ -249,8 +248,12 @@ class BaseOpenAI(BaseLLM): data.get("model_name", "") return super().__new__(cls) - - @classmethod + class Config: + """Configuration for this pydantic object.""" + + allow_population_by_field_name = True + + @root_validator(pre=True) def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" all_required_field_names = get_pydantic_field_names(cls) @@ -260,8 +263,7 @@ class BaseOpenAI(BaseLLM): ) return values - - @classmethod + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" values["openai_api_key"] = get_from_dict_or_env( @@ -756,7 +758,7 @@ class AzureOpenAI(BaseOpenAI): openai_api_type: str = "" openai_api_version: str = "" - @classmethod + @root_validator() def validate_azure_settings(cls, values: Dict) -> Dict: values["openai_api_version"] = get_from_dict_or_env( values, @@ -845,7 +847,7 @@ class OpenAIChat(BaseLLM): disallowed_special: Union[Literal["all"], Collection[str]] = "all" """Set of special tokens that are not allowed。""" - @classmethod + @root_validator(pre=True) def build_extra(cls, values: Dict[str, Any]) -> Dict[str, Any]: """Build extra kwargs from additional params that were passed in.""" all_required_field_names = { @@ -863,8 +865,7 @@ class OpenAIChat(BaseLLM): values["model_kwargs"] = extra return values - - @classmethod + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate that api key and python package exists in environment.""" openai_api_key = get_from_dict_or_env( diff --git a/swarms/models/palm.py b/swarms/models/palm.py index e016a776..d61d4856 100644 --- a/swarms/models/palm.py +++ b/swarms/models/palm.py @@ -15,7 +15,6 @@ from tenacity import ( stop_after_attempt, wait_exponential, ) -from pydantic import model_validator logger = logging.getLogger(__name__) @@ -105,8 +104,7 @@ class GooglePalm(BaseLLM, BaseModel): """Number of chat completions to generate for each prompt. Note that the API may not return the full n completions if duplicates are generated.""" - @model_validator() - @classmethod + @root_validator() def validate_environment(cls, values: Dict) -> Dict: """Validate api key, python package exists.""" google_api_key = get_from_dict_or_env( diff --git a/swarms/models/ssd_1b.py b/swarms/models/ssd_1b.py index 9a905bd4..d3b9086b 100644 --- a/swarms/models/ssd_1b.py +++ b/swarms/models/ssd_1b.py @@ -9,7 +9,7 @@ import backoff import torch from diffusers import StableDiffusionXLPipeline from PIL import Image -from pydantic import field_validator +from pydantic import validator from termcolor import colored from cachetools import TTLCache @@ -72,8 +72,7 @@ class SSD1B: arbitrary_types_allowed = True - @field_validator("max_retries", "time_seconds") - @classmethod + @validator("max_retries", "time_seconds") def must_be_positive(cls, value): if value <= 0: raise ValueError("Must be positive") diff --git a/swarms/models/timm.py b/swarms/models/timm.py index 8dec0bc9..d1c42165 100644 --- a/swarms/models/timm.py +++ b/swarms/models/timm.py @@ -2,14 +2,17 @@ from typing import List import timm import torch -from pydantic import ConfigDict, BaseModel +from pydantic import BaseModel class TimmModelInfo(BaseModel): model_name: str pretrained: bool in_chans: int - model_config = ConfigDict(strict=True) + + class Config: + # Use strict typing for all fields + strict = True class TimmModel: diff --git a/swarms/tools/tool.py b/swarms/tools/tool.py index 838b89bb..1029a183 100644 --- a/swarms/tools/tool.py +++ b/swarms/tools/tool.py @@ -29,7 +29,14 @@ from langchain.callbacks.manager import ( ) from langchain.load.serializable import Serializable - +from pydantic import ( + BaseModel, + Extra, + Field, + create_model, + root_validator, + validate_arguments, +) from langchain.schema.runnable import ( Runnable, RunnableConfig, @@ -41,9 +48,62 @@ class SchemaAnnotationError(TypeError): """Raised when 'args_schema' is missing or has an incorrect type annotation.""" +def _create_subset_model( + name: str, model: BaseModel, field_names: list +) -> Type[BaseModel]: + """Create a pydantic model with only a subset of model's fields.""" + fields = {} + for field_name in field_names: + field = model.__fields__[field_name] + fields[field_name] = (field.outer_type_, field.field_info) + return create_model(name, **fields) # type: ignore + +def _get_filtered_args( + inferred_model: Type[BaseModel], + func: Callable, +) -> dict: + """Get the arguments from a function's signature.""" + schema = inferred_model.schema()["properties"] + valid_keys = signature(func).parameters + return { + k: schema[k] + for k in valid_keys + if k not in ("run_manager", "callbacks") + } +class _SchemaConfig: + """Configuration for the pydantic model.""" + + extra: Any = Extra.forbid + arbitrary_types_allowed: bool = True + + +def create_schema_from_function( + model_name: str, + func: Callable, +) -> Type[BaseModel]: + """Create a pydantic schema from a function's signature. + Args: + model_name: Name to assign to the generated pydandic schema + func: Function to generate the schema from + Returns: + A pydantic model with the same arguments as the function + """ + # https://docs.pydantic.dev/latest/usage/validation_decorator/ + validated = validate_arguments(func, config=_SchemaConfig) # type: ignore + inferred_model = validated.model # type: ignore + if "run_manager" in inferred_model.__fields__: + del inferred_model.__fields__["run_manager"] + if "callbacks" in inferred_model.__fields__: + del inferred_model.__fields__["callbacks"] + # Pydantic adds placeholder virtual fields we need to strip + valid_properties = _get_filtered_args(inferred_model, func) + return _create_subset_model( + f"{model_name}Schema", inferred_model, list(valid_properties) + ) + class ToolException(Exception): """An optional exception that tool throws when execution error occurs. @@ -71,7 +131,7 @@ class BaseTool(RunnableSerializable[Union[str, Dict], Any]): if args_schema_type is not None: if ( args_schema_type is None - # or args_schema_type == BaseModel + or args_schema_type == BaseModel ): # Throw errors for common mis-annotations. # TODO: Use get_args / get_origin and fully @@ -108,10 +168,11 @@ class ChildTool(BaseTool): verbose: bool = False """Whether to log the tool's progress.""" - callbacks: Callbacks = None + callbacks: Callbacks = Field(default=None, exclude=True) """Callbacks to be called during tool execution.""" - # TODO: I don't know how to remove Field here - callback_manager: Optional[BaseCallbackManager] = None + callback_manager: Optional[BaseCallbackManager] = Field( + default=None, exclude=True + ) """Deprecated. Please use callbacks instead.""" tags: Optional[List[str]] = None """Optional list of tags associated with the tool. Defaults to None @@ -131,11 +192,9 @@ class ChildTool(BaseTool): ] = False """Handle the content of the ToolException thrown.""" - # TODO[pydantic]: The `Config` class inherits from another class, please create the `model_config` manually. - # Check https://docs.pydantic.dev/dev-v2/migration/#changes-to-config for more information. class Config(Serializable.Config): """Configuration for this pydantic object.""" - model_config = {} + arbitrary_types_allowed = True @property @@ -155,8 +214,7 @@ class ChildTool(BaseTool): # --- Runnable --- @property - # TODO - def input_schema(self): + def input_schema(self) -> Type[BaseModel]: """The tool's input schema.""" if self.args_schema is not None: return self.args_schema @@ -218,7 +276,7 @@ class ChildTool(BaseTool): } return tool_input - @classmethod + @root_validator() def raise_deprecation(cls, values: Dict) -> Dict: """Raise deprecation warning if callback_manager is used.""" if values.get("callback_manager") is not None: @@ -613,7 +671,9 @@ class StructuredTool(BaseTool): """Tool that can operate on any number of inputs.""" description: str = "" - + args_schema: Type[BaseModel] = Field( + ..., description="The tool schema." + ) """The input arguments' schema.""" func: Optional[Callable[..., Any]] """The function to run when the tool is called.""" diff --git a/swarms/utils/serializable.py b/swarms/utils/serializable.py index 3cc3a5f6..de9444ef 100644 --- a/swarms/utils/serializable.py +++ b/swarms/utils/serializable.py @@ -1,7 +1,7 @@ from abc import ABC from typing import Any, Dict, List, Literal, TypedDict, Union, cast -from pydantic import ConfigDict, BaseModel, PrivateAttr +from pydantic import BaseModel, PrivateAttr class BaseSerialized(TypedDict): @@ -64,7 +64,9 @@ class Serializable(BaseModel, ABC): constructor. """ return {} - model_config = ConfigDict(extra="ignore") + + class Config: + extra = "ignore" _lc_kwargs = PrivateAttr(default_factory=dict) diff --git a/tests/Dockerfile b/tests/Dockerfile index e28fbc8e..f6e46515 100644 --- a/tests/Dockerfile +++ b/tests/Dockerfile @@ -2,8 +2,6 @@ # -================== # Use an official Python runtime as a parent image FROM python:3.9-slim -RUN apt-get update && apt-get -y install libgl1-mesa-dev libglib2.0-0; apt-get clean -RUN pip install opencv-contrib-python-headless # Set environment variables to make Python output unbuffered and disable the PIP cache ENV PYTHONDONTWRITEBYTECODE 1 From b90be0c95100490b38a9339fde5a083f0a9551eb Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 5 Dec 2023 11:55:17 -0800 Subject: [PATCH 27/32] pyproject toml --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 0ed3e85f..53592d35 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -24,7 +24,7 @@ classifiers = [ [tool.poetry.dependencies] python = "^3.9.1" torch = "2.1.1" -transformers > 2.10 +transformers = "2.10" openai = "0.28.0" langchain = "*" asyncio = "*" From 57daf25db4451bfe80f94796089ad7e7fbae0d11 Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 5 Dec 2023 11:58:17 -0800 Subject: [PATCH 28/32] [EXAMPLES FIX] --- README.md | 7 +++++-- example.py | 1 + sequential_workflow_example.py | 6 ++++-- 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/README.md b/README.md index c9441685..3ce706e6 100644 --- a/README.md +++ b/README.md @@ -50,7 +50,9 @@ api_key = os.environ.get("OPENAI_API_KEY") # Initialize the language model llm = OpenAIChat( temperature=0.5, + model_name="gpt-4", openai_api_key=api_key, + max_tokens=4000 ) @@ -86,9 +88,10 @@ api_key = os.getenv("OPENAI_API_KEY") # Initialize the language agent llm = OpenAIChat( - openai_api_key=api_key, temperature=0.5, - max_tokens=3000, + model_name="gpt-4", + openai_api_key=api_key, + max_tokens=4000 ) diff --git a/example.py b/example.py index 9fc89485..6ed2cbab 100644 --- a/example.py +++ b/example.py @@ -15,6 +15,7 @@ api_key = os.environ.get("OPENAI_API_KEY") # Initialize the language model llm = OpenAIChat( temperature=0.5, + model_name="gpt-4", openai_api_key=api_key, ) diff --git a/sequential_workflow_example.py b/sequential_workflow_example.py index 1742f49c..38cf5559 100644 --- a/sequential_workflow_example.py +++ b/sequential_workflow_example.py @@ -11,10 +11,12 @@ api_key = os.getenv("OPENAI_API_KEY") # Initialize the language agent +# Initialize the language model llm = OpenAIChat( - openai_api_key=api_key, temperature=0.5, - max_tokens=2000, + model_name="gpt-4", + openai_api_key=api_key, + max_tokens=4000 ) From f08698fc3a3470f65bc00df4f791512d0af55748 Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 5 Dec 2023 12:12:32 -0800 Subject: [PATCH 29/32] [FIX][Zephyr] --- .env.example | 3 ++ swarms/models/__init__.py | 4 +-- tests/upload_tests_to_issues | 58 ++++++++++++++++++++++++++++++++++++ 3 files changed, 63 insertions(+), 2 deletions(-) create mode 100644 tests/upload_tests_to_issues diff --git a/.env.example b/.env.example index bebc8fa3..23f4d87b 100644 --- a/.env.example +++ b/.env.example @@ -42,3 +42,6 @@ PINECONE_API_KEY="" BING_COOKIE="" PSG_CONNECTION_STRING="" +GITHUB_USERNAME="" +GITHUB_REPO_NAME="" +GITHUB_TOKEN="" \ No newline at end of file diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py index d2256aa8..089585a8 100644 --- a/swarms/models/__init__.py +++ b/swarms/models/__init__.py @@ -8,7 +8,7 @@ from swarms.models.openai_models import ( AzureOpenAI, OpenAIChat, ) # noqa: E402 -from swarms.models.zephyr import Zephyr # noqa: E402 +# from swarms.models.zephyr import Zephyr # noqa: E402 from swarms.models.biogpt import BioGPT # noqa: E402 from swarms.models.huggingface import HuggingfaceLLM # noqa: E402 from swarms.models.wizard_storytelling import ( @@ -42,7 +42,7 @@ __all__ = [ "OpenAI", "AzureOpenAI", "OpenAIChat", - "Zephyr", + # "Zephyr", "BaseMultiModalModel", "Idefics", # "Kosmos", diff --git a/tests/upload_tests_to_issues b/tests/upload_tests_to_issues new file mode 100644 index 00000000..cc2392e3 --- /dev/null +++ b/tests/upload_tests_to_issues @@ -0,0 +1,58 @@ +import os +import subprocess +import json +import re +import requests +from dotenv import load_dotenv + +load_dotenv + +# Constants +GITHUB_USERNAME = os.getenv('GITHUB_USERNAME') +REPO_NAME = os.getenv('GITHUB_REPO_NAME') +GITHUB_TOKEN = os.getenv('GITHUB_TOKEN') +ISSUES_URL = f'https://api.github.com/repos/{GITHUB_USERNAME}/{REPO_NAME}/issues' + +# Headers for authentication +headers = { + 'Authorization': f'token {GITHUB_TOKEN}', + 'Accept': 'application/vnd.github.v3+json' +} + +def run_pytest(): + result = subprocess.run(['pytest'], capture_output=True, text=True) + return result.stdout + result.stderr + +def parse_pytest_output(output): + errors = [] + current_error = None + + for line in output.split('\n'): + if line.startswith('_________________________'): + if current_error: + errors.append(current_error) + current_error = {'title': '', 'body': ''} + elif current_error is not None: + if not current_error['title']: + current_error['title'] = line.strip() + current_error['body'] += line + '\n' + + if current_error: + errors.append(current_error) + return errors + +def create_github_issue(title, body): + issue = {'title': title, 'body': body} + response = requests.post(ISSUES_URL, headers=headers, json=issue) + return response.json() + +def main(): + pytest_output = run_pytest() + errors = parse_pytest_output(pytest_output) + + for error in errors: + issue_response = create_github_issue(error['title'], error['body']) + print(f"Issue created: {issue_response.get('html_url')}") + +if __name__ == '__main__': + main() From 5ff74165c0dbd38e157dc601ac92d9c59df5ae9c Mon Sep 17 00:00:00 2001 From: Kye Date: Tue, 5 Dec 2023 12:25:31 -0800 Subject: [PATCH 30/32] [EXAMPLES] --- pyproject.toml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 53592d35..c182abd9 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "poetry.core.masonry.api" [tool.poetry] name = "swarms" -version = "2.5.7" +version = "2.6.0" description = "Swarms - Pytorch" license = "MIT" authors = ["Kye Gomez "] @@ -34,6 +34,7 @@ google-generativeai = "*" langchain-experimental = "*" playwright = "*" duckduckgo-search = "*" +opencv-python-headless = "*" faiss-cpu = "*" backoff = "*" marshmallow = "*" From d5d3381d201208f5ae05508b79e4ba9cfed85e2f Mon Sep 17 00:00:00 2001 From: Sashin Date: Tue, 5 Dec 2023 22:32:51 +0200 Subject: [PATCH 31/32] implementing chroma --- swarms/memory/chroma.py | 703 ---------------------------------------- 1 file changed, 703 deletions(-) diff --git a/swarms/memory/chroma.py b/swarms/memory/chroma.py index 67ba4cb2..e69de29b 100644 --- a/swarms/memory/chroma.py +++ b/swarms/memory/chroma.py @@ -1,703 +0,0 @@ -from __future__ import annotations - -import logging -import uuid -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Tuple, - Type, -) - -import numpy as np - -from swarms.structs.document import Document -from swarms.models.embeddings_base import Embeddings -from langchain.schema.vectorstore import VectorStore -from langchain.utils import xor_args -from langchain.vectorstores.utils import maximal_marginal_relevance - -if TYPE_CHECKING: - import chromadb - import chromadb.config - from chromadb.api.types import ID, OneOrMany, Where, WhereDocument - -logger = logging.getLogger() -DEFAULT_K = 4 # Number of Documents to return. - - -def _results_to_docs(results: Any) -> List[Document]: - return [doc for doc, _ in _results_to_docs_and_scores(results)] - - -def _results_to_docs_and_scores(results: Any) -> List[Tuple[Document, float]]: - return [ - # TODO: Chroma can do batch querying, - # we shouldn't hard code to the 1st result - (Document(page_content=result[0], metadata=result[1] or {}), result[2]) - for result in zip( - results["documents"][0], - results["metadatas"][0], - results["distances"][0], - ) - ] - - -class Chroma(VectorStore): - """`ChromaDB` vector store. - - To use, you should have the ``chromadb`` python package installed. - - Example: - .. code-block:: python - - from langchain.vectorstores import Chroma - from langchain.embeddings.openai import OpenAIEmbeddings - - embeddings = OpenAIEmbeddings() - vectorstore = Chroma("langchain_store", embeddings) - """ - - _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain" - - def __init__( - self, - collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, - embedding_function: Optional[Embeddings] = None, - persist_directory: Optional[str] = None, - client_settings: Optional[chromadb.config.Settings] = None, - collection_metadata: Optional[Dict] = None, - client: Optional[chromadb.Client] = None, - relevance_score_fn: Optional[Callable[[float], float]] = None, - ) -> None: - """Initialize with a Chroma client.""" - try: - import chromadb - import chromadb.config - except ImportError: - raise ImportError( - "Could not import chromadb python package. " - "Please install it with `pip install chromadb`." - ) - - if client is not None: - self._client_settings = client_settings - self._client = client - self._persist_directory = persist_directory - else: - if client_settings: - # If client_settings is provided with persist_directory specified, - # then it is "in-memory and persisting to disk" mode. - client_settings.persist_directory = ( - persist_directory or client_settings.persist_directory - ) - if client_settings.persist_directory is not None: - # Maintain backwards compatibility with chromadb < 0.4.0 - major, minor, _ = chromadb.__version__.split(".") - if int(major) == 0 and int(minor) < 4: - client_settings.chroma_db_impl = "duckdb+parquet" - - _client_settings = client_settings - elif persist_directory: - # Maintain backwards compatibility with chromadb < 0.4.0 - major, minor, _ = chromadb.__version__.split(".") - if int(major) == 0 and int(minor) < 4: - _client_settings = chromadb.config.Settings( - chroma_db_impl="duckdb+parquet", - ) - else: - _client_settings = chromadb.config.Settings(is_persistent=True) - _client_settings.persist_directory = persist_directory - else: - _client_settings = chromadb.config.Settings() - self._client_settings = _client_settings - self._client = chromadb.Client(_client_settings) - self._persist_directory = ( - _client_settings.persist_directory or persist_directory - ) - - self._embedding_function = embedding_function - self._collection = self._client.get_or_create_collection( - name=collection_name, - embedding_function=self._embedding_function.embed_documents - if self._embedding_function is not None - else None, - metadata=collection_metadata, - ) - self.override_relevance_score_fn = relevance_score_fn - - @property - def embeddings(self) -> Optional[Embeddings]: - return self._embedding_function - - @xor_args(("query_texts", "query_embeddings")) - def __query_collection( - self, - query_texts: Optional[List[str]] = None, - query_embeddings: Optional[List[List[float]]] = None, - n_results: int = 4, - where: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Query the chroma collection.""" - try: - import chromadb # noqa: F401 - except ImportError: - raise ValueError( - "Could not import chromadb python package. " - "Please install it with `pip install chromadb`." - ) - return self._collection.query( - query_texts=query_texts, - query_embeddings=query_embeddings, - n_results=n_results, - where=where, - where_document=where_document, - **kwargs, - ) - - def add_texts( - self, - texts: Iterable[str], - metadatas: Optional[List[dict]] = None, - ids: Optional[List[str]] = None, - **kwargs: Any, - ) -> List[str]: - """Run more texts through the embeddings and add to the vectorstore. - - Args: - texts (Iterable[str]): Texts to add to the vectorstore. - metadatas (Optional[List[dict]], optional): Optional list of metadatas. - ids (Optional[List[str]], optional): Optional list of IDs. - - Returns: - List[str]: List of IDs of the added texts. - """ - # TODO: Handle the case where the user doesn't provide ids on the Collection - if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] - embeddings = None - texts = list(texts) - if self._embedding_function is not None: - embeddings = self._embedding_function.embed_documents(texts) - if metadatas: - # fill metadatas with empty dicts if somebody - # did not specify metadata for all texts - length_diff = len(texts) - len(metadatas) - if length_diff: - metadatas = metadatas + [{}] * length_diff - empty_ids = [] - non_empty_ids = [] - for idx, m in enumerate(metadatas): - if m: - non_empty_ids.append(idx) - else: - empty_ids.append(idx) - if non_empty_ids: - metadatas = [metadatas[idx] for idx in non_empty_ids] - texts_with_metadatas = [texts[idx] for idx in non_empty_ids] - embeddings_with_metadatas = ( - [embeddings[idx] for idx in non_empty_ids] if embeddings else None - ) - ids_with_metadata = [ids[idx] for idx in non_empty_ids] - try: - self._collection.upsert( - metadatas=metadatas, - embeddings=embeddings_with_metadatas, - documents=texts_with_metadatas, - ids=ids_with_metadata, - ) - except ValueError as e: - if "Expected metadata value to be" in str(e): - msg = ( - "Try filtering complex metadata from the document using " - "langchain.vectorstores.utils.filter_complex_metadata." - ) - raise ValueError(e.args[0] + "\n\n" + msg) - else: - raise e - if empty_ids: - texts_without_metadatas = [texts[j] for j in empty_ids] - embeddings_without_metadatas = ( - [embeddings[j] for j in empty_ids] if embeddings else None - ) - ids_without_metadatas = [ids[j] for j in empty_ids] - self._collection.upsert( - embeddings=embeddings_without_metadatas, - documents=texts_without_metadatas, - ids=ids_without_metadatas, - ) - else: - self._collection.upsert( - embeddings=embeddings, - documents=texts, - ids=ids, - ) - return ids - - def similarity_search( - self, - query: str, - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Run similarity search with Chroma. - - Args: - query (str): Query text to search for. - k (int): Number of results to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List[Document]: List of documents most similar to the query text. - """ - docs_and_scores = self.similarity_search_with_score(query, k, filter=filter) - return [doc for doc, _ in docs_and_scores] - - def similarity_search_by_vector( - self, - embedding: List[float], - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Return docs most similar to embedding vector. - Args: - embedding (List[float]): Embedding to look up documents similar to. - k (int): Number of Documents to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - Returns: - List of Documents most similar to the query vector. - """ - results = self.__query_collection( - query_embeddings=embedding, - n_results=k, - where=filter, - where_document=where_document, - ) - return _results_to_docs(results) - - def similarity_search_by_vector_with_relevance_scores( - self, - embedding: List[float], - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Tuple[Document, float]]: - """ - Return docs most similar to embedding vector and similarity score. - - Args: - embedding (List[float]): Embedding to look up documents similar to. - k (int): Number of Documents to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List[Tuple[Document, float]]: List of documents most similar to - the query text and cosine distance in float for each. - Lower score represents more similarity. - """ - results = self.__query_collection( - query_embeddings=embedding, - n_results=k, - where=filter, - where_document=where_document, - ) - return _results_to_docs_and_scores(results) - - def similarity_search_with_score( - self, - query: str, - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Tuple[Document, float]]: - """Run similarity search with Chroma with distance. - - Args: - query (str): Query text to search for. - k (int): Number of results to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List[Tuple[Document, float]]: List of documents most similar to - the query text and cosine distance in float for each. - Lower score represents more similarity. - """ - if self._embedding_function is None: - results = self.__query_collection( - query_texts=[query], - n_results=k, - where=filter, - where_document=where_document, - ) - else: - query_embedding = self._embedding_function.embed_query(query) - results = self.__query_collection( - query_embeddings=[query_embedding], - n_results=k, - where=filter, - where_document=where_document, - ) - - return _results_to_docs_and_scores(results) - - def _select_relevance_score_fn(self) -> Callable[[float], float]: - """ - The 'correct' relevance function - may differ depending on a few things, including: - - the distance / similarity metric used by the VectorStore - - the scale of your embeddings (OpenAI's are unit normed. Many others are not!) - - embedding dimensionality - - etc. - """ - if self.override_relevance_score_fn: - return self.override_relevance_score_fn - - distance = "l2" - distance_key = "hnsw:space" - metadata = self._collection.metadata - - if metadata and distance_key in metadata: - distance = metadata[distance_key] - - if distance == "cosine": - return self._cosine_relevance_score_fn - elif distance == "l2": - return self._euclidean_relevance_score_fn - elif distance == "ip": - return self._max_inner_product_relevance_score_fn - else: - raise ValueError( - "No supported normalization function" - f" for distance metric of type: {distance}." - "Consider providing relevance_score_fn to Chroma constructor." - ) - - def max_marginal_relevance_search_by_vector( - self, - embedding: List[float], - k: int = DEFAULT_K, - fetch_k: int = 20, - lambda_mult: float = 0.5, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Return docs selected using the maximal marginal relevance. - Maximal marginal relevance optimizes for similarity to query AND diversity - among selected documents. - - Args: - embedding: Embedding to look up documents similar to. - k: Number of Documents to return. Defaults to 4. - fetch_k: Number of Documents to fetch to pass to MMR algorithm. - lambda_mult: Number between 0 and 1 that determines the degree - of diversity among the results with 0 corresponding - to maximum diversity and 1 to minimum diversity. - Defaults to 0.5. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List of Documents selected by maximal marginal relevance. - """ - - results = self.__query_collection( - query_embeddings=embedding, - n_results=fetch_k, - where=filter, - where_document=where_document, - include=["metadatas", "documents", "distances", "embeddings"], - ) - mmr_selected = maximal_marginal_relevance( - np.array(embedding, dtype=np.float32), - results["embeddings"][0], - k=k, - lambda_mult=lambda_mult, - ) - - candidates = _results_to_docs(results) - - selected_results = [r for i, r in enumerate(candidates) if i in mmr_selected] - return selected_results - - def max_marginal_relevance_search( - self, - query: str, - k: int = DEFAULT_K, - fetch_k: int = 20, - lambda_mult: float = 0.5, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Return docs selected using the maximal marginal relevance. - Maximal marginal relevance optimizes for similarity to query AND diversity - among selected documents. - - Args: - query: Text to look up documents similar to. - k: Number of Documents to return. Defaults to 4. - fetch_k: Number of Documents to fetch to pass to MMR algorithm. - lambda_mult: Number between 0 and 1 that determines the degree - of diversity among the results with 0 corresponding - to maximum diversity and 1 to minimum diversity. - Defaults to 0.5. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List of Documents selected by maximal marginal relevance. - """ - if self._embedding_function is None: - raise ValueError( - "For MMR search, you must specify an embedding function oncreation." - ) - - embedding = self._embedding_function.embed_query(query) - docs = self.max_marginal_relevance_search_by_vector( - embedding, - k, - fetch_k, - lambda_mult=lambda_mult, - filter=filter, - where_document=where_document, - ) - return docs - - def delete_collection(self) -> None: - """Delete the collection.""" - self._client.delete_collection(self._collection.name) - - def get( - self, - ids: Optional[OneOrMany[ID]] = None, - where: Optional[Where] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - where_document: Optional[WhereDocument] = None, - include: Optional[List[str]] = None, - ) -> Dict[str, Any]: - """Gets the collection. - - Args: - ids: The ids of the embeddings to get. Optional. - where: A Where type dict used to filter results by. - E.g. `{"color" : "red", "price": 4.20}`. Optional. - limit: The number of documents to return. Optional. - offset: The offset to start returning results from. - Useful for paging results with limit. Optional. - where_document: A WhereDocument type dict used to filter by the documents. - E.g. `{$contains: "hello"}`. Optional. - include: A list of what to include in the results. - Can contain `"embeddings"`, `"metadatas"`, `"documents"`. - Ids are always included. - Defaults to `["metadatas", "documents"]`. Optional. - """ - kwargs = { - "ids": ids, - "where": where, - "limit": limit, - "offset": offset, - "where_document": where_document, - } - - if include is not None: - kwargs["include"] = include - - return self._collection.get(**kwargs) - - def persist(self) -> None: - """Persist the collection. - - This can be used to explicitly persist the data to disk. - It will also be called automatically when the object is destroyed. - """ - if self._persist_directory is None: - raise ValueError( - "You must specify a persist_directory on" - "creation to persist the collection." - ) - import chromadb - - # Maintain backwards compatibility with chromadb < 0.4.0 - major, minor, _ = chromadb.__version__.split(".") - if int(major) == 0 and int(minor) < 4: - self._client.persist() - - def update_document(self, document_id: str, document: Document) -> None: - """Update a document in the collection. - - Args: - document_id (str): ID of the document to update. - document (Document): Document to update. - """ - return self.update_documents([document_id], [document]) - - def update_documents(self, ids: List[str], documents: List[Document]) -> None: - """Update a document in the collection. - - Args: - ids (List[str]): List of ids of the document to update. - documents (List[Document]): List of documents to update. - """ - text = [document.page_content for document in documents] - metadata = [document.metadata for document in documents] - if self._embedding_function is None: - raise ValueError( - "For update, you must specify an embedding function on creation." - ) - embeddings = self._embedding_function.embed_documents(text) - - if hasattr( - self._collection._client, "max_batch_size" - ): # for Chroma 0.4.10 and above - from chromadb.utils.batch_utils import create_batches - - for batch in create_batches( - api=self._collection._client, - ids=ids, - metadatas=metadata, - documents=text, - embeddings=embeddings, - ): - self._collection.update( - ids=batch[0], - embeddings=batch[1], - documents=batch[3], - metadatas=batch[2], - ) - else: - self._collection.update( - ids=ids, - embeddings=embeddings, - documents=text, - metadatas=metadata, - ) - - @classmethod - def from_texts( - cls: Type[Chroma], - texts: List[str], - embedding: Optional[Embeddings] = None, - metadatas: Optional[List[dict]] = None, - ids: Optional[List[str]] = None, - collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, - persist_directory: Optional[str] = None, - client_settings: Optional[chromadb.config.Settings] = None, - client: Optional[chromadb.Client] = None, - collection_metadata: Optional[Dict] = None, - **kwargs: Any, - ) -> Chroma: - """Create a Chroma vectorstore from a raw documents. - - If a persist_directory is specified, the collection will be persisted there. - Otherwise, the data will be ephemeral in-memory. - - Args: - texts (List[str]): List of texts to add to the collection. - collection_name (str): Name of the collection to create. - persist_directory (Optional[str]): Directory to persist the collection. - embedding (Optional[Embeddings]): Embedding function. Defaults to None. - metadatas (Optional[List[dict]]): List of metadatas. Defaults to None. - ids (Optional[List[str]]): List of document IDs. Defaults to None. - client_settings (Optional[chromadb.config.Settings]): Chroma client settings - collection_metadata (Optional[Dict]): Collection configurations. - Defaults to None. - - Returns: - Chroma: Chroma vectorstore. - """ - chroma_collection = cls( - collection_name=collection_name, - embedding_function=embedding, - persist_directory=persist_directory, - client_settings=client_settings, - client=client, - collection_metadata=collection_metadata, - **kwargs, - ) - if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] - if hasattr( - chroma_collection._client, "max_batch_size" - ): # for Chroma 0.4.10 and above - from chromadb.utils.batch_utils import create_batches - - for batch in create_batches( - api=chroma_collection._client, - ids=ids, - metadatas=metadatas, - documents=texts, - ): - chroma_collection.add_texts( - texts=batch[3] if batch[3] else [], - metadatas=batch[2] if batch[2] else None, - ids=batch[0], - ) - else: - chroma_collection.add_texts(texts=texts, metadatas=metadatas, ids=ids) - return chroma_collection - - @classmethod - def from_documents( - cls: Type[Chroma], - documents: List[Document], - embedding: Optional[Embeddings] = None, - ids: Optional[List[str]] = None, - collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, - persist_directory: Optional[str] = None, - client_settings: Optional[chromadb.config.Settings] = None, - client: Optional[chromadb.Client] = None, # Add this line - collection_metadata: Optional[Dict] = None, - **kwargs: Any, - ) -> Chroma: - """Create a Chroma vectorstore from a list of documents. - - If a persist_directory is specified, the collection will be persisted there. - Otherwise, the data will be ephemeral in-memory. - - Args: - collection_name (str): Name of the collection to create. - persist_directory (Optional[str]): Directory to persist the collection. - ids (Optional[List[str]]): List of document IDs. Defaults to None. - documents (List[Document]): List of documents to add to the vectorstore. - embedding (Optional[Embeddings]): Embedding function. Defaults to None. - client_settings (Optional[chromadb.config.Settings]): Chroma client settings - collection_metadata (Optional[Dict]): Collection configurations. - Defaults to None. - - Returns: - Chroma: Chroma vectorstore. - """ - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - return cls.from_texts( - texts=texts, - embedding=embedding, - metadatas=metadatas, - ids=ids, - collection_name=collection_name, - persist_directory=persist_directory, - client_settings=client_settings, - client=client, - collection_metadata=collection_metadata, - **kwargs, - ) - - def delete(self, ids: Optional[List[str]] = None, **kwargs: Any) -> None: - """Delete by vector IDs. - - Args: - ids: List of ids to delete. - """ - self._collection.delete(ids=ids) From 3d1614d3ccb9b15a9456cd96a00b58cddf14c244 Mon Sep 17 00:00:00 2001 From: Sashin Date: Tue, 5 Dec 2023 22:33:45 +0200 Subject: [PATCH 32/32] implementing chroma --- swarms/memory/chroma.py | 753 ---------------------------------------- 1 file changed, 753 deletions(-) diff --git a/swarms/memory/chroma.py b/swarms/memory/chroma.py index 79b92964..e69de29b 100644 --- a/swarms/memory/chroma.py +++ b/swarms/memory/chroma.py @@ -1,753 +0,0 @@ -from __future__ import annotations - -import logging -import uuid -from typing import ( - TYPE_CHECKING, - Any, - Callable, - Dict, - Iterable, - List, - Optional, - Tuple, - Type, -) - -import numpy as np - -from swarms.structs.document import Document -from swarms.models.embeddings_base import Embeddings -from langchain.schema.vectorstore import VectorStore -from langchain.utils import xor_args -from langchain.vectorstores.utils import maximal_marginal_relevance - -if TYPE_CHECKING: - import chromadb - import chromadb.config - from chromadb.api.types import ID, OneOrMany, Where, WhereDocument - -logger = logging.getLogger() -DEFAULT_K = 4 # Number of Documents to return. - - -def _results_to_docs(results: Any) -> List[Document]: - return [doc for doc, _ in _results_to_docs_and_scores(results)] - - -def _results_to_docs_and_scores( - results: Any, -) -> List[Tuple[Document, float]]: - return [ - # TODO: Chroma can do batch querying, - # we shouldn't hard code to the 1st result - ( - Document( - page_content=result[0], metadata=result[1] or {} - ), - result[2], - ) - for result in zip( - results["documents"][0], - results["metadatas"][0], - results["distances"][0], - ) - ] - - -class Chroma(VectorStore): - """`ChromaDB` vector store. - - To use, you should have the ``chromadb`` python package installed. - - Example: - .. code-block:: python - - from langchain.vectorstores import Chroma - from langchain.embeddings.openai import OpenAIEmbeddings - - embeddings = OpenAIEmbeddings() - vectorstore = Chroma("langchain_store", embeddings) - """ - - _LANGCHAIN_DEFAULT_COLLECTION_NAME = "langchain" - - def __init__( - self, - collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, - embedding_function: Optional[Embeddings] = None, - persist_directory: Optional[str] = None, - client_settings: Optional[chromadb.config.Settings] = None, - collection_metadata: Optional[Dict] = None, - client: Optional[chromadb.Client] = None, - relevance_score_fn: Optional[Callable[[float], float]] = None, - ) -> None: - """Initialize with a Chroma client.""" - try: - import chromadb - import chromadb.config - except ImportError: - raise ImportError( - "Could not import chromadb python package. " - "Please install it with `pip install chromadb`." - ) - - if client is not None: - self._client_settings = client_settings - self._client = client - self._persist_directory = persist_directory - else: - if client_settings: - # If client_settings is provided with persist_directory specified, - # then it is "in-memory and persisting to disk" mode. - client_settings.persist_directory = ( - persist_directory - or client_settings.persist_directory - ) - if client_settings.persist_directory is not None: - # Maintain backwards compatibility with chromadb < 0.4.0 - major, minor, _ = chromadb.__version__.split(".") - if int(major) == 0 and int(minor) < 4: - client_settings.chroma_db_impl = ( - "duckdb+parquet" - ) - - _client_settings = client_settings - elif persist_directory: - # Maintain backwards compatibility with chromadb < 0.4.0 - major, minor, _ = chromadb.__version__.split(".") - if int(major) == 0 and int(minor) < 4: - _client_settings = chromadb.config.Settings( - chroma_db_impl="duckdb+parquet", - ) - else: - _client_settings = chromadb.config.Settings( - is_persistent=True - ) - _client_settings.persist_directory = persist_directory - else: - _client_settings = chromadb.config.Settings() - self._client_settings = _client_settings - self._client = chromadb.Client(_client_settings) - self._persist_directory = ( - _client_settings.persist_directory - or persist_directory - ) - - self._embedding_function = embedding_function - self._collection = self._client.get_or_create_collection( - name=collection_name, - embedding_function=( - self._embedding_function.embed_documents - if self._embedding_function is not None - else None - ), - metadata=collection_metadata, - ) - self.override_relevance_score_fn = relevance_score_fn - - @property - def embeddings(self) -> Optional[Embeddings]: - return self._embedding_function - - @xor_args(("query_texts", "query_embeddings")) - def __query_collection( - self, - query_texts: Optional[List[str]] = None, - query_embeddings: Optional[List[List[float]]] = None, - n_results: int = 4, - where: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Query the chroma collection.""" - try: - import chromadb # noqa: F401 - except ImportError: - raise ValueError( - "Could not import chromadb python package. " - "Please install it with `pip install chromadb`." - ) - return self._collection.query( - query_texts=query_texts, - query_embeddings=query_embeddings, - n_results=n_results, - where=where, - where_document=where_document, - **kwargs, - ) - - def add_texts( - self, - texts: Iterable[str], - metadatas: Optional[List[dict]] = None, - ids: Optional[List[str]] = None, - **kwargs: Any, - ) -> List[str]: - """Run more texts through the embeddings and add to the vectorstore. - - Args: - texts (Iterable[str]): Texts to add to the vectorstore. - metadatas (Optional[List[dict]], optional): Optional list of metadatas. - ids (Optional[List[str]], optional): Optional list of IDs. - - Returns: - List[str]: List of IDs of the added texts. - """ - # TODO: Handle the case where the user doesn't provide ids on the Collection - if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] - embeddings = None - texts = list(texts) - if self._embedding_function is not None: - embeddings = self._embedding_function.embed_documents( - texts - ) - if metadatas: - # fill metadatas with empty dicts if somebody - # did not specify metadata for all texts - length_diff = len(texts) - len(metadatas) - if length_diff: - metadatas = metadatas + [{}] * length_diff - empty_ids = [] - non_empty_ids = [] - for idx, m in enumerate(metadatas): - if m: - non_empty_ids.append(idx) - else: - empty_ids.append(idx) - if non_empty_ids: - metadatas = [metadatas[idx] for idx in non_empty_ids] - texts_with_metadatas = [ - texts[idx] for idx in non_empty_ids - ] - embeddings_with_metadatas = ( - [embeddings[idx] for idx in non_empty_ids] - if embeddings - else None - ) - ids_with_metadata = [ - ids[idx] for idx in non_empty_ids - ] - try: - self._collection.upsert( - metadatas=metadatas, - embeddings=embeddings_with_metadatas, - documents=texts_with_metadatas, - ids=ids_with_metadata, - ) - except ValueError as e: - if "Expected metadata value to be" in str(e): - msg = ( - "Try filtering complex metadata from the" - " document" - " using " - "langchain.vectorstores.utils.filter_complex_metadata." - ) - raise ValueError(e.args[0] + "\n\n" + msg) - else: - raise e - if empty_ids: - texts_without_metadatas = [ - texts[j] for j in empty_ids - ] - embeddings_without_metadatas = ( - [embeddings[j] for j in empty_ids] - if embeddings - else None - ) - ids_without_metadatas = [ids[j] for j in empty_ids] - self._collection.upsert( - embeddings=embeddings_without_metadatas, - documents=texts_without_metadatas, - ids=ids_without_metadatas, - ) - else: - self._collection.upsert( - embeddings=embeddings, - documents=texts, - ids=ids, - ) - return ids - - def similarity_search( - self, - query: str, - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Run similarity search with Chroma. - - Args: - query (str): Query text to search for. - k (int): Number of results to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List[Document]: List of documents most similar to the query text. - """ - docs_and_scores = self.similarity_search_with_score( - query, k, filter=filter - ) - return [doc for doc, _ in docs_and_scores] - - def similarity_search_by_vector( - self, - embedding: List[float], - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Return docs most similar to embedding vector. - Args: - embedding (List[float]): Embedding to look up documents similar to. - k (int): Number of Documents to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - Returns: - List of Documents most similar to the query vector. - """ - results = self.__query_collection( - query_embeddings=embedding, - n_results=k, - where=filter, - where_document=where_document, - ) - return _results_to_docs(results) - - def similarity_search_by_vector_with_relevance_scores( - self, - embedding: List[float], - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Tuple[Document, float]]: - """ - Return docs most similar to embedding vector and similarity score. - - Args: - embedding (List[float]): Embedding to look up documents similar to. - k (int): Number of Documents to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List[Tuple[Document, float]]: List of documents most similar to - the query text and cosine distance in float for each. - Lower score represents more similarity. - """ - results = self.__query_collection( - query_embeddings=embedding, - n_results=k, - where=filter, - where_document=where_document, - ) - return _results_to_docs_and_scores(results) - - def similarity_search_with_score( - self, - query: str, - k: int = DEFAULT_K, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Tuple[Document, float]]: - """Run similarity search with Chroma with distance. - - Args: - query (str): Query text to search for. - k (int): Number of results to return. Defaults to 4. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List[Tuple[Document, float]]: List of documents most similar to - the query text and cosine distance in float for each. - Lower score represents more similarity. - """ - if self._embedding_function is None: - results = self.__query_collection( - query_texts=[query], - n_results=k, - where=filter, - where_document=where_document, - ) - else: - query_embedding = self._embedding_function.embed_query( - query - ) - results = self.__query_collection( - query_embeddings=[query_embedding], - n_results=k, - where=filter, - where_document=where_document, - ) - - return _results_to_docs_and_scores(results) - - def _select_relevance_score_fn(self) -> Callable[[float], float]: - """ - The 'correct' relevance function - may differ depending on a few things, including: - - the distance / similarity metric used by the VectorStore - - the scale of your embeddings (OpenAI's are unit normed. Many others are not!) - - embedding dimensionality - - etc. - """ - if self.override_relevance_score_fn: - return self.override_relevance_score_fn - - distance = "l2" - distance_key = "hnsw:space" - metadata = self._collection.metadata - - if metadata and distance_key in metadata: - distance = metadata[distance_key] - - if distance == "cosine": - return self._cosine_relevance_score_fn - elif distance == "l2": - return self._euclidean_relevance_score_fn - elif distance == "ip": - return self._max_inner_product_relevance_score_fn - else: - raise ValueError( - "No supported normalization function for distance" - f" metric of type: {distance}.Consider providing" - " relevance_score_fn to Chroma constructor." - ) - - def max_marginal_relevance_search_by_vector( - self, - embedding: List[float], - k: int = DEFAULT_K, - fetch_k: int = 20, - lambda_mult: float = 0.5, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Return docs selected using the maximal marginal relevance. - Maximal marginal relevance optimizes for similarity to query AND diversity - among selected documents. - - Args: - embedding: Embedding to look up documents similar to. - k: Number of Documents to return. Defaults to 4. - fetch_k: Number of Documents to fetch to pass to MMR algorithm. - lambda_mult: Number between 0 and 1 that determines the degree - of diversity among the results with 0 corresponding - to maximum diversity and 1 to minimum diversity. - Defaults to 0.5. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List of Documents selected by maximal marginal relevance. - """ - - results = self.__query_collection( - query_embeddings=embedding, - n_results=fetch_k, - where=filter, - where_document=where_document, - include=[ - "metadatas", - "documents", - "distances", - "embeddings", - ], - ) - mmr_selected = maximal_marginal_relevance( - np.array(embedding, dtype=np.float32), - results["embeddings"][0], - k=k, - lambda_mult=lambda_mult, - ) - - candidates = _results_to_docs(results) - - selected_results = [ - r for i, r in enumerate(candidates) if i in mmr_selected - ] - return selected_results - - def max_marginal_relevance_search( - self, - query: str, - k: int = DEFAULT_K, - fetch_k: int = 20, - lambda_mult: float = 0.5, - filter: Optional[Dict[str, str]] = None, - where_document: Optional[Dict[str, str]] = None, - **kwargs: Any, - ) -> List[Document]: - """Return docs selected using the maximal marginal relevance. - Maximal marginal relevance optimizes for similarity to query AND diversity - among selected documents. - - Args: - query: Text to look up documents similar to. - k: Number of Documents to return. Defaults to 4. - fetch_k: Number of Documents to fetch to pass to MMR algorithm. - lambda_mult: Number between 0 and 1 that determines the degree - of diversity among the results with 0 corresponding - to maximum diversity and 1 to minimum diversity. - Defaults to 0.5. - filter (Optional[Dict[str, str]]): Filter by metadata. Defaults to None. - - Returns: - List of Documents selected by maximal marginal relevance. - """ - if self._embedding_function is None: - raise ValueError( - "For MMR search, you must specify an embedding" - " function oncreation." - ) - - embedding = self._embedding_function.embed_query(query) - docs = self.max_marginal_relevance_search_by_vector( - embedding, - k, - fetch_k, - lambda_mult=lambda_mult, - filter=filter, - where_document=where_document, - ) - return docs - - def delete_collection(self) -> None: - """Delete the collection.""" - self._client.delete_collection(self._collection.name) - - def get( - self, - ids: Optional[OneOrMany[ID]] = None, - where: Optional[Where] = None, - limit: Optional[int] = None, - offset: Optional[int] = None, - where_document: Optional[WhereDocument] = None, - include: Optional[List[str]] = None, - ) -> Dict[str, Any]: - """Gets the collection. - - Args: - ids: The ids of the embeddings to get. Optional. - where: A Where type dict used to filter results by. - E.g. `{"color" : "red", "price": 4.20}`. Optional. - limit: The number of documents to return. Optional. - offset: The offset to start returning results from. - Useful for paging results with limit. Optional. - where_document: A WhereDocument type dict used to filter by the documents. - E.g. `{$contains: "hello"}`. Optional. - include: A list of what to include in the results. - Can contain `"embeddings"`, `"metadatas"`, `"documents"`. - Ids are always included. - Defaults to `["metadatas", "documents"]`. Optional. - """ - kwargs = { - "ids": ids, - "where": where, - "limit": limit, - "offset": offset, - "where_document": where_document, - } - - if include is not None: - kwargs["include"] = include - - return self._collection.get(**kwargs) - - def persist(self) -> None: - """Persist the collection. - - This can be used to explicitly persist the data to disk. - It will also be called automatically when the object is destroyed. - """ - if self._persist_directory is None: - raise ValueError( - "You must specify a persist_directory on" - "creation to persist the collection." - ) - import chromadb - - # Maintain backwards compatibility with chromadb < 0.4.0 - major, minor, _ = chromadb.__version__.split(".") - if int(major) == 0 and int(minor) < 4: - self._client.persist() - - def update_document( - self, document_id: str, document: Document - ) -> None: - """Update a document in the collection. - - Args: - document_id (str): ID of the document to update. - document (Document): Document to update. - """ - return self.update_documents([document_id], [document]) - - def update_documents( - self, ids: List[str], documents: List[Document] - ) -> None: - """Update a document in the collection. - - Args: - ids (List[str]): List of ids of the document to update. - documents (List[Document]): List of documents to update. - """ - text = [document.page_content for document in documents] - metadata = [document.metadata for document in documents] - if self._embedding_function is None: - raise ValueError( - "For update, you must specify an embedding function" - " on creation." - ) - embeddings = self._embedding_function.embed_documents(text) - - if hasattr( - self._collection._client, "max_batch_size" - ): # for Chroma 0.4.10 and above - from chromadb.utils.batch_utils import create_batches - - for batch in create_batches( - api=self._collection._client, - ids=ids, - metadatas=metadata, - documents=text, - embeddings=embeddings, - ): - self._collection.update( - ids=batch[0], - embeddings=batch[1], - documents=batch[3], - metadatas=batch[2], - ) - else: - self._collection.update( - ids=ids, - embeddings=embeddings, - documents=text, - metadatas=metadata, - ) - - @classmethod - def from_texts( - cls: Type[Chroma], - texts: List[str], - embedding: Optional[Embeddings] = None, - metadatas: Optional[List[dict]] = None, - ids: Optional[List[str]] = None, - collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, - persist_directory: Optional[str] = None, - client_settings: Optional[chromadb.config.Settings] = None, - client: Optional[chromadb.Client] = None, - collection_metadata: Optional[Dict] = None, - **kwargs: Any, - ) -> Chroma: - """Create a Chroma vectorstore from a raw documents. - - If a persist_directory is specified, the collection will be persisted there. - Otherwise, the data will be ephemeral in-memory. - - Args: - texts (List[str]): List of texts to add to the collection. - collection_name (str): Name of the collection to create. - persist_directory (Optional[str]): Directory to persist the collection. - embedding (Optional[Embeddings]): Embedding function. Defaults to None. - metadatas (Optional[List[dict]]): List of metadatas. Defaults to None. - ids (Optional[List[str]]): List of document IDs. Defaults to None. - client_settings (Optional[chromadb.config.Settings]): Chroma client settings - collection_metadata (Optional[Dict]): Collection configurations. - Defaults to None. - - Returns: - Chroma: Chroma vectorstore. - """ - chroma_collection = cls( - collection_name=collection_name, - embedding_function=embedding, - persist_directory=persist_directory, - client_settings=client_settings, - client=client, - collection_metadata=collection_metadata, - **kwargs, - ) - if ids is None: - ids = [str(uuid.uuid1()) for _ in texts] - if hasattr( - chroma_collection._client, "max_batch_size" - ): # for Chroma 0.4.10 and above - from chromadb.utils.batch_utils import create_batches - - for batch in create_batches( - api=chroma_collection._client, - ids=ids, - metadatas=metadatas, - documents=texts, - ): - chroma_collection.add_texts( - texts=batch[3] if batch[3] else [], - metadatas=batch[2] if batch[2] else None, - ids=batch[0], - ) - else: - chroma_collection.add_texts( - texts=texts, metadatas=metadatas, ids=ids - ) - return chroma_collection - - @classmethod - def from_documents( - cls: Type[Chroma], - documents: List[Document], - embedding: Optional[Embeddings] = None, - ids: Optional[List[str]] = None, - collection_name: str = _LANGCHAIN_DEFAULT_COLLECTION_NAME, - persist_directory: Optional[str] = None, - client_settings: Optional[chromadb.config.Settings] = None, - client: Optional[chromadb.Client] = None, # Add this line - collection_metadata: Optional[Dict] = None, - **kwargs: Any, - ) -> Chroma: - """Create a Chroma vectorstore from a list of documents. - - If a persist_directory is specified, the collection will be persisted there. - Otherwise, the data will be ephemeral in-memory. - - Args: - collection_name (str): Name of the collection to create. - persist_directory (Optional[str]): Directory to persist the collection. - ids (Optional[List[str]]): List of document IDs. Defaults to None. - documents (List[Document]): List of documents to add to the vectorstore. - embedding (Optional[Embeddings]): Embedding function. Defaults to None. - client_settings (Optional[chromadb.config.Settings]): Chroma client settings - collection_metadata (Optional[Dict]): Collection configurations. - Defaults to None. - - Returns: - Chroma: Chroma vectorstore. - """ - texts = [doc.page_content for doc in documents] - metadatas = [doc.metadata for doc in documents] - return cls.from_texts( - texts=texts, - embedding=embedding, - metadatas=metadatas, - ids=ids, - collection_name=collection_name, - persist_directory=persist_directory, - client_settings=client_settings, - client=client, - collection_metadata=collection_metadata, - **kwargs, - ) - - def delete( - self, ids: Optional[List[str]] = None, **kwargs: Any - ) -> None: - """Delete by vector IDs. - - Args: - ids: List of ids to delete. - """ - self._collection.delete(ids=ids)