Yeah we formatted it

pull/388/head
Wyatt Stanke 11 months ago
parent 89a35a27bc
commit e851966295
No known key found for this signature in database
GPG Key ID: CE6BA5FFF135536D

@ -0,0 +1,11 @@
[flake8]
max-line-length = 127
extend-ignore = E203
per-file-ignores =
# Most of this is just long strings
./swarms/prompts/**.py: E501 W293 W291
./swarms/__init__.py: F401
exclude =
./playground
./tests
./scripts

@ -1,6 +1,6 @@
from swarms import Agent, OpenAIChat from swarms import Agent, OpenAIChat
## Initialize the workflow # Initialize the workflow
agent = Agent( agent = Agent(
llm=OpenAIChat(), llm=OpenAIChat(),
max_loops=1, max_loops=1,

@ -0,0 +1,10 @@
exclude = ["./playground", "./tests", "./scripts"]
line-length = 127
[lint]
ignore = ["E203"]
select = ["E", "F", "W"]
[lint.per-file-ignores]
"./swarms/prompts/**.py" = ["E501", "W291", "W293"]
"./swarms/__init__.py" = ["F401"]

@ -23,9 +23,12 @@ class TextArtifact(BaseArtifact):
Methods: Methods:
__add__(self, other: BaseArtifact) -> TextArtifact: Concatenates the text value of the artifact with another artifact. __add__(self, other: BaseArtifact) -> TextArtifact: Concatenates the text value of the artifact with another artifact.
__bool__(self) -> bool: Checks if the text value of the artifact is non-empty. __bool__(self) -> bool: Checks if the text value of the artifact is non-empty.
generate_embedding(self, driver: BaseEmbeddingModel) -> Optional[list[float]]: Generates the embedding of the text artifact using a given embedding model. generate_embedding(self, driver: BaseEmbeddingModel) -> Optional[list[float]]:
token_count(self, tokenizer: BaseTokenizer) -> int: Counts the number of tokens in the text artifact using a given tokenizer. Generates the embedding of the text artifact using a given embedding model.
to_bytes(self) -> bytes: Converts the text value of the artifact to bytes using the specified encoding and error handler. token_count(self, tokenizer: BaseTokenizer) -> int:
Counts the number of tokens in the text artifact using a given tokenizer.
to_bytes(self) -> bytes:
Converts the text value of the artifact to bytes using the specified encoding and error handler.
""" """
value: str value: str

@ -7,7 +7,8 @@ class InternalMemoryBase(ABC):
"""Abstract base class for internal memory of agents in the swarm.""" """Abstract base class for internal memory of agents in the swarm."""
def __init__(self, n_entries): def __init__(self, n_entries):
"""Initialize the internal memory. In the current architecture the memory always consists of a set of soltuions or evaluations. """Initialize the internal memory.
In the current architecture the memory always consists of a set of soltuions or evaluations.
During the operation, the agent should retrivie best solutions from it's internal memory based on the score. During the operation, the agent should retrivie best solutions from it's internal memory based on the score.
Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string. Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string.
@ -28,7 +29,8 @@ class InternalMemoryBase(ABC):
class DictInternalMemory(InternalMemoryBase): class DictInternalMemory(InternalMemoryBase):
def __init__(self, n_entries: int): def __init__(self, n_entries: int):
""" """
Initialize the internal memory. In the current architecture the memory always consists of a set of solutions or evaluations. Initialize the internal memory.
In the current architecture the memory always consists of a set of solutions or evaluations.
Simple key-value store for now. Simple key-value store for now.
Args: Args:

@ -16,13 +16,15 @@ class DictSharedMemory:
Methods: Methods:
__init__(self, file_loc: str = None) -> None: Initializes the shared memory. __init__(self, file_loc: str = None) -> None: Initializes the shared memory.
add_entry(self, score: float, agent_id: str, agent_cycle: int, entry: Any) -> bool: Adds an entry to the internal memory. add_entry(self, score: float, agent_id: str, agent_cycle: int, entry: Any) -> bool:
Adds an entry to the internal memory.
get_top_n(self, n: int) -> None: Gets the top n entries from the internal memory. get_top_n(self, n: int) -> None: Gets the top n entries from the internal memory.
write_to_file(self, data: Dict[str, Dict[str, Any]]) -> bool: Writes the internal memory to a file. write_to_file(self, data: Dict[str, Dict[str, Any]]) -> bool: Writes the internal memory to a file.
""" """
def __init__(self, file_loc: str = None) -> None: def __init__(self, file_loc: str = None) -> None:
"""Initialize the shared memory. In the current architecture the memory always consists of a set of soltuions or evaluations. """Initialize the shared memory.
In the current architecture the memory always consists of a set of soltuions or evaluations.
Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string. Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string.
""" """
if file_loc is not None: if file_loc is not None:

@ -153,7 +153,8 @@ class LangchainChromaVectorMemory:
query (str): The query to search for. query (str): The query to search for.
k (int): The number of results to return. k (int): The number of results to return.
type (str): The type of search to perform: "cos" or "mmr". type (str): The type of search to perform: "cos" or "mmr".
distance_threshold (float): The similarity threshold to use for the search. Results with distance > similarity_threshold will be dropped. distance_threshold (float):
The similarity threshold to use for the search. Results with distance > similarity_threshold will be dropped.
Returns: Returns:
list[str]: A list of the top k results. list[str]: A list of the top k results.

@ -24,13 +24,31 @@ class PineconeDB(AbstractVectorDatabase):
index (pinecone.Index, optional): The Pinecone index to use. Defaults to None. index (pinecone.Index, optional): The Pinecone index to use. Defaults to None.
Methods: Methods:
upsert_vector(vector: list[float], vector_id: Optional[str] = None, namespace: Optional[str] = None, meta: Optional[dict] = None, **kwargs) -> str: upsert_vector(
vector: list[float],
vector_id: Optional[str] = None,
namespace: Optional[str] = None,
meta: Optional[dict] = None,
**kwargs
) -> str:
Upserts a vector into the index. Upserts a vector into the index.
load_entry(vector_id: str, namespace: Optional[str] = None) -> Optional[BaseVectorStore.Entry]: load_entry(
vector_id: str,
namespace: Optional[str] = None
) -> Optional[BaseVectorStore.Entry]:
Loads a single vector from the index. Loads a single vector from the index.
load_entries(namespace: Optional[str] = None) -> list[BaseVectorStore.Entry]: load_entries(
namespace: Optional[str] = None
) -> list[BaseVectorStore.Entry]:
Loads all vectors from the index. Loads all vectors from the index.
query(query: str, count: Optional[int] = None, namespace: Optional[str] = None, include_vectors: bool = False, include_metadata=True, **kwargs) -> list[BaseVectorStore.QueryResult]: query(
query: str,
count: Optional[int] = None,
namespace: Optional[str] = None,
include_vectors: bool = False,
include_metadata=True,
**kwargs
) -> list[BaseVectorStore.QueryResult]:
Queries the index for vectors similar to the given query string. Queries the index for vectors similar to the given query string.
create_index(name: str, **kwargs) -> None: create_index(name: str, **kwargs) -> None:
Creates a new index. Creates a new index.

@ -14,7 +14,7 @@ from swarms.models.clipq import CLIPQ # noqa: E402
# from swarms.models.whisperx_model import WhisperX # noqa: E402 # from swarms.models.whisperx_model import WhisperX # noqa: E402
# from swarms.models.kosmos_two import Kosmos # noqa: E402 # from swarms.models.kosmos_two import Kosmos # noqa: E402
# from swarms.models.cog_agent import CogAgent # noqa: E402 # from swarms.models.cog_agent import CogAgent # noqa: E402
## Function calling models # Function calling models
from swarms.models.fire_function import ( from swarms.models.fire_function import (
FireFunctionCaller, FireFunctionCaller,
) )
@ -59,7 +59,7 @@ from swarms.models.timm import TimmModel # noqa: E402
# ) # noqa: E402 # ) # noqa: E402
from swarms.models.together import TogetherLLM # noqa: E402 from swarms.models.together import TogetherLLM # noqa: E402
############## Types # Types
from swarms.models.types import ( # noqa: E402 from swarms.models.types import ( # noqa: E402
AudioModality, AudioModality,
ImageModality, ImageModality,

@ -40,14 +40,30 @@ class BaseMultiModalModel:
Examples: Examples:
>>> from swarms.models.base_multimodal_model import BaseMultiModalModel >>> from swarms.models.base_multimodal_model import BaseMultiModalModel
>>> model = BaseMultiModalModel() >>> model = BaseMultiModalModel()
>>> link = "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"
>>> model.run("Generate a summary of this text") >>> model.run("Generate a summary of this text")
>>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png") >>> model.run("Generate a summary of this text", link)
>>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"]) >>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"])
>>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) >>> model.run_batch([
>>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"]) ("Generate a summary of this text", link),
>>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) ("Generate a summary of this text", link)
>>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"]) ])
>>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) >>> model.run_batch_async([
"Generate a summary of this text",
"Generate a summary of this text"
])
>>> model.run_batch_async([
("Generate a summary of this text", link),
("Generate a summary of this text", link)
])
>>> model.run_batch_async_with_retries([
"Generate a summary of this text",
"Generate a summary of this text"
])
>>> model.run_batch_async_with_retries([
("Generate a summary of this text", link),
("Generate a summary of this text", link)
])
>>> model.generate_summary("Generate a summary of this text") >>> model.generate_summary("Generate a summary of this text")
>>> model.set_temperature(0.5) >>> model.set_temperature(0.5)
>>> model.set_max_tokens(500) >>> model.set_max_tokens(500)
@ -348,9 +364,9 @@ class BaseMultiModalModel:
_type_: _description_ _type_: _description_
""" """
META_PROMPT = """ META_PROMPT = """
For any labels or markings on an image that you reference in your response, please For any labels or markings on an image that you reference in your response, please
enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for
example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be
numbers or letters and typically correspond to specific segments or parts of the image. numbers or letters and typically correspond to specific segments or parts of the image.
""" """
return META_PROMPT return META_PROMPT

@ -2,7 +2,8 @@ r"""
BioGPT BioGPT
Pre-trained language models have attracted increasing attention in the biomedical domain, Pre-trained language models have attracted increasing attention in the biomedical domain,
inspired by their great success in the general natural language domain. inspired by their great success in the general natural language domain.
Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), Among the two main branches of pre-trained language models in the general language domain,
i.e. BERT (and its variants) and GPT (and its variants),
the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT.
While they have achieved great success on a variety of discriminative downstream biomedical tasks, While they have achieved great success on a variety of discriminative downstream biomedical tasks,
the lack of generation ability constrains their application scope. the lack of generation ability constrains their application scope.
@ -24,7 +25,6 @@ advantage of BioGPT on biomedical literature to generate fluent descriptions for
number = {6}, number = {6},
year = {2022}, year = {2022},
month = {09}, month = {09},
abstract = "{Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98\%, 38.42\% and 40.76\% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2\% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.}",
issn = {1477-4054}, issn = {1477-4054},
doi = {10.1093/bib/bbac409}, doi = {10.1093/bib/bbac409},
url = {https://doi.org/10.1093/bib/bbac409}, url = {https://doi.org/10.1093/bib/bbac409},

@ -161,7 +161,8 @@ class CogVLMMultiModal(BaseMultiModalModel):
Methods: Methods:
run: Generates a response using the CogVLM model. run: Generates a response using the CogVLM model.
generate_stream_cogvlm: Generates a stream of responses using the CogVLM model in inference mode. generate_stream_cogvlm: Generates a stream of responses using the CogVLM model in inference mode.
process_history_and_images: Processes history messages to extract text, identify the last user query, and convert base64 encoded image URLs to PIL images. process_history_and_images: Processes history messages to extract text, identify the last user query,
and convert base64 encoded image URLs to PIL images.
Example: Example:
>>> model = CogVLMMultiModal() >>> model = CogVLMMultiModal()

@ -257,7 +257,7 @@ class Dalle3:
"""Print the Dalle3 dashboard""" """Print the Dalle3 dashboard"""
print( print(
colored( colored(
f"""Dalle3 Dashboard: f"""Dalle3 Dashboard:
-------------------- --------------------
Model: {self.model} Model: {self.model}
@ -271,8 +271,8 @@ class Dalle3:
Save Folder: {self.save_folder} Save Folder: {self.save_folder}
Image Format: {self.image_format} Image Format: {self.image_format}
-------------------- --------------------
""", """,
"green", "green",
) )

@ -37,7 +37,8 @@ class ElevenLabsText2SpeechTool(BaseTool):
Defaults to ElevenLabsModel.MULTI_LINGUAL. Defaults to ElevenLabsModel.MULTI_LINGUAL.
name (str): The name of the tool. Defaults to "eleven_labs_text2speech". name (str): The name of the tool. Defaults to "eleven_labs_text2speech".
description (str): The description of the tool. description (str): The description of the tool.
Defaults to "A wrapper around Eleven Labs Text2Speech. Useful for when you need to convert text to speech. It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi." Defaults to "A wrapper around Eleven Labs Text2Speech. Useful for when you need to convert text to speech.
It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi."
Usage: Usage:

@ -132,13 +132,13 @@ class Gemini(BaseMultiModalModel):
system_prompt (str, optional): _description_. Defaults to None. system_prompt (str, optional): _description_. Defaults to None.
""" """
PROMPT = f""" PROMPT = f"""
{self.system_prompt} {self.system_prompt}
###### ######
{task} {task}
""" """
return PROMPT return PROMPT

@ -11,7 +11,8 @@ from swarms.utils.supervision_visualizer import MarkVisualizer
class GPT4VSAM(BaseMultiModalModel): class GPT4VSAM(BaseMultiModalModel):
""" """
GPT4VSAM class represents a multi-modal model that combines the capabilities of GPT-4 and SegmentAnythingMarkGenerator. GPT4VSAM class represents a multi-modal model that combines the capabilities of GPT-4 and SegmentAnythingMarkGenerator.
It takes an instance of BaseMultiModalModel (vlm) and a device as input and provides methods for loading images and making predictions. It takes an instance of BaseMultiModalModel (vlm)
and a device as input and provides methods for loading images and making predictions.
Args: Args:
vlm (BaseMultiModalModel): An instance of BaseMultiModalModel representing the visual language model. vlm (BaseMultiModalModel): An instance of BaseMultiModalModel representing the visual language model.

@ -203,8 +203,9 @@ class GPT4VisionAPI(BaseMultiModalModel):
""" """
PROMPT = f""" PROMPT = f"""
These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video: These are frames from a video that I want to upload.
Generate a compelling description that I can upload along with the video:
{frames} {frames}
""" """
return PROMPT return PROMPT

@ -63,7 +63,8 @@ class Idefics(BaseMultiModalModel):
response = model.chat(user_input) response = model.chat(user_input)
print(response) print(response)
user_input = "User: And who is that? https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052" user_input = "User: And who is that? \
https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052"
response = model.chat(user_input) response = model.chat(user_input)
print(response) print(response)

@ -26,7 +26,8 @@ class MedicalSAM:
Methods: Methods:
__post_init__(): Initializes the MedicalSAM object. __post_init__(): Initializes the MedicalSAM object.
download_model_weights(model_path: str): Downloads the model weights from the specified URL and saves them to the given file path. download_model_weights(model_path: str):
Downloads the model weights from the specified URL and saves them to the given file path.
preprocess(img): Preprocesses the input image. preprocess(img): Preprocesses the input image.
run(img, box): Runs the semantic segmentation on the input image within the specified bounding box. run(img, box): Runs the semantic segmentation on the input image within the specified bounding box.

@ -37,9 +37,6 @@ from tenacity import (
stop_after_attempt, stop_after_attempt,
wait_exponential, wait_exponential,
) )
logger = logging.getLogger(__name__)
from importlib.metadata import version from importlib.metadata import version
from packaging.version import parse from packaging.version import parse
@ -239,9 +236,9 @@ class BaseOpenAI(BaseLLM):
attributes["openai_api_base"] = self.openai_api_base attributes["openai_api_base"] = self.openai_api_base
if self.openai_organization != "": if self.openai_organization != "":
attributes[ attributes["openai_organization"] = (
"openai_organization" self.openai_organization
] = self.openai_organization )
if self.openai_proxy != "": if self.openai_proxy != "":
attributes["openai_proxy"] = self.openai_proxy attributes["openai_proxy"] = self.openai_proxy
@ -651,9 +648,10 @@ class BaseOpenAI(BaseLLM):
"organization": self.openai_organization, "organization": self.openai_organization,
} }
if self.openai_proxy: if self.openai_proxy:
import openai pass
# TODO: The 'openai.proxy' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(proxy={"http": self.openai_proxy, "https": self.openai_proxy})' # TODO: The 'openai.proxy' option isn't read in the client API. You will need to pass it when you instantiate the
# client, e.g. 'OpenAI(proxy={"http": self.openai_proxy, "https": self.openai_proxy})'
# openai.proxy = {"http": self.openai_proxy, "https": self.openai_proxy} # type: ignore[assignment] # noqa: E501 # openai.proxy = {"http": self.openai_proxy, "https": self.openai_proxy} # type: ignore[assignment] # noqa: E501
return {**openai_creds, **self._default_params} return {**openai_creds, **self._default_params}
@ -940,27 +938,27 @@ class OpenAIChat(BaseLLM):
@root_validator() @root_validator()
def validate_environment(cls, values: dict) -> dict: def validate_environment(cls, values: dict) -> dict:
"""Validate that api key and python package exists in environment.""" """Validate that api key and python package exists in environment."""
openai_api_key = get_from_dict_or_env( # openai_api_key = get_from_dict_or_env(
values, "openai_api_key", "OPENAI_API_KEY" # values, "openai_api_key", "OPENAI_API_KEY"
) # )
openai_api_base = get_from_dict_or_env( # openai_api_base = get_from_dict_or_env(
values, # values,
"openai_api_base", # "openai_api_base",
"OPENAI_API_BASE", # "OPENAI_API_BASE",
default="", # default="",
) # )
openai_proxy = get_from_dict_or_env( # openai_proxy = get_from_dict_or_env(
values, # values,
"openai_proxy", # "openai_proxy",
"OPENAI_PROXY", # "OPENAI_PROXY",
default="", # default="",
) # )
openai_organization = get_from_dict_or_env( # openai_organization = get_from_dict_or_env(
values, # values,
"openai_organization", # "openai_organization",
"OPENAI_ORGANIZATION", # "OPENAI_ORGANIZATION",
default="", # default="",
) # )
try: try:
import openai import openai
except ImportError: except ImportError:

@ -27,7 +27,8 @@ class SAM:
processor (SamProcessor): The processor for the SAM model. processor (SamProcessor): The processor for the SAM model.
Methods: Methods:
run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks. run(task=None, img=None, *args, **kwargs):
Runs the SAM model on the given image and returns the segmentation scores and masks.
process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image. process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image.
""" """

@ -2,18 +2,29 @@
SpeechT5 (TTS task) SpeechT5 (TTS task)
SpeechT5 model fine-tuned for speech synthesis (text-to-speech) on LibriTTS. SpeechT5 model fine-tuned for speech synthesis (text-to-speech) on LibriTTS.
This model was introduced in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. This model was introduced in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by
Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu,
Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
SpeechT5 was first released in this repository, original weights. The license used is MIT. SpeechT5 was first released in this repository, original weights. The license used is MIT.
Model Description Model Description
Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models,
we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text
representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific
(speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network
models the sequence-to-sequence transformation,and then the post-nets generate the output in the speech/text modality based on
the output of the decoder.
Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation,
hoping to improve the modeling capability for both speech and text. To align the textual and speech information into
this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text
states with latent units as the interface between encoder and decoder.
Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing
tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement,
and speaker identification.
Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder.
Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification.
Developed by: Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei.
Shared by [optional]: Matthijs Hollemans Shared by [optional]: Matthijs Hollemans
Model type: text-to-speech Model type: text-to-speech
Language(s) (NLP): [More Information Needed] Language(s) (NLP): [More Information Needed]

@ -171,7 +171,7 @@ class SSD1B:
"""Print the SSD1B dashboard""" """Print the SSD1B dashboard"""
print( print(
colored( colored(
f"""SSD1B Dashboard: f"""SSD1B Dashboard:
-------------------- --------------------
Model: {self.model} Model: {self.model}
@ -185,8 +185,8 @@ class SSD1B:
Save Folder: {self.save_folder} Save Folder: {self.save_folder}
Image Format: {self.image_format} Image Format: {self.image_format}
-------------------- --------------------
""", """,
"green", "green",
) )

@ -34,7 +34,7 @@ commands: {
""" """
########### FEW SHOT EXAMPLES ################ # FEW SHOT EXAMPLES #
SCENARIOS = """ SCENARIOS = """
commands: { commands: {
"tools": { "tools": {

@ -62,6 +62,6 @@ def worker_tools_sop_promp(name: str, memory: str, time=time):
[{memory}] [{memory}]
Human: Determine which next command to use, and respond using the format specified above: Human: Determine which next command to use, and respond using the format specified above:
""".format(name=name, time=time, memory=memory) """.format(name=name, time=time, memory=memory) # noqa: F521
return str(out) return str(out)

@ -447,7 +447,7 @@ class Agent:
Name: {self.agent_name} Name: {self.agent_name}
Description: {self.agent_description} Description: {self.agent_description}
Standard Operating Procedure: {self.sop} Standard Operating Procedure: {self.sop}
System Prompt: {self.system_prompt} System Prompt: {self.system_prompt}
Task: {task} Task: {task}
Max Loops: {self.max_loops} Max Loops: {self.max_loops}
Stopping Condition: {self.stopping_condition} Stopping Condition: {self.stopping_condition}
@ -778,7 +778,7 @@ class Agent:
Follow this standard operating procedure (SOP) to complete tasks: Follow this standard operating procedure (SOP) to complete tasks:
{self.sop} {self.sop}
{history} {history}
""" """
return agent_history_prompt return agent_history_prompt
@ -786,7 +786,7 @@ class Agent:
system_prompt = self.system_prompt system_prompt = self.system_prompt
agent_history_prompt = f""" agent_history_prompt = f"""
System : {system_prompt} System : {system_prompt}
{history} {history}
""" """
return agent_history_prompt return agent_history_prompt

@ -96,7 +96,8 @@ class AsyncWorkflow:
# if self.dashboard: # if self.dashboard:
# self.display() # self.display()
# Add a stopping condition to stop the workflow, if provided but stopping_condition takes in a parameter s for string # Add a stopping condition to stop the workflow,
# if provided but stopping_condition takes in a parameter s for string
if self.stopping_condition: if self.stopping_condition:
if self.stopping_condition(self.results): if self.stopping_condition(self.results):
break break

@ -86,10 +86,6 @@ class AbstractSwarm(ABC):
def step(self): def step(self):
"""Step the swarm""" """Step the swarm"""
# @abstractmethod
def add_agent(self, agent: "Agent"):
"""Add a agent to the swarm"""
# @abstractmethod # @abstractmethod
def remove_agent(self, agent: "Agent"): def remove_agent(self, agent: "Agent"):
"""Remove a agent from the swarm""" """Remove a agent from the swarm"""

@ -181,7 +181,16 @@ class BaseWorkflow(BaseStructure):
>>> workflow.add("Create a report on these metrics", llm) >>> workflow.add("Create a report on these metrics", llm)
>>> workflow.delete_task("What's the weather in miami") >>> workflow.delete_task("What's the weather in miami")
>>> workflow.tasks >>> workflow.tasks
[Task(description='Create a report on these metrics', agent=Agent(llm=OpenAIChat(openai_api_key=''), max_loops=1, dashboard=False), args=[], kwargs={}, result=None, history=[])] [
Task(
description='Create a report on these metrics',
agent=Agent(llm=OpenAIChat(openai_api_key=''), max_loops=1, dashboard=False),
args=[],
kwargs={},
result=None,
history=[]
)
]
""" """
try: try:
for task in self.tasks: for task in self.tasks:

@ -15,7 +15,8 @@ class ConcurrentWorkflow(BaseStructure):
Args: Args:
max_workers (int): The maximum number of workers to use for the ThreadPoolExecutor. max_workers (int): The maximum number of workers to use for the ThreadPoolExecutor.
autosave (bool): Whether to save the state of the workflow to a file. Default is False. autosave (bool): Whether to save the state of the workflow to a file. Default is False.
saved_state_filepath (str): The filepath to save the state of the workflow to. Default is "runs/concurrent_workflow.json". saved_state_filepath (str):
The filepath to save the state of the workflow to. Default is "runs/concurrent_workflow.json".
print_results (bool): Whether to print the results of each task. Default is False. print_results (bool): Whether to print the results of each task. Default is False.
return_results (bool): Whether to return the results of each task. Default is False. return_results (bool): Whether to return the results of each task. Default is False.
use_processes (bool): Whether to use processes instead of threads. Default is False. use_processes (bool): Whether to use processes instead of threads. Default is False.

@ -19,7 +19,8 @@ class DebatePlayer(Agent):
Args: Args:
model_name(str): model name model_name(str): model name
name (str): name of this player name (str): name of this player
temperature (float): higher values make the output more random, while lower values make it more focused and deterministic temperature (float):
higher values make the output more random, while lower values make it more focused and deterministic
openai_api_key (str): As the parameter name suggests openai_api_key (str): As the parameter name suggests
sleep_time (float): sleep because of rate limits sleep_time (float): sleep because of rate limits
""" """
@ -31,7 +32,8 @@ class Debate:
Args: Args:
model_name (str): openai model name model_name (str): openai model name
temperature (float): higher values make the output more random, while lower values make it more focused and deterministic temperature (float):
higher values make the output more random, while lower values make it more focused and deterministic
num_players (int): num of players num_players (int): num of players
save_file_dir (str): dir path to json file save_file_dir (str): dir path to json file
openai_api_key (str): As the parameter name suggests openai_api_key (str): As the parameter name suggests
@ -359,6 +361,13 @@ class Debate:
# with open(prompts_path, 'w') as file: # with open(prompts_path, 'w') as file:
# json.dump(config, file, ensure_ascii=False, indent=4) # json.dump(config, file, ensure_ascii=False, indent=4)
# debate = Debate(save_file_dir=save_file_dir, num_players=3, openai_api_key=openai_api_key, prompts_path=prompts_path, temperature=0, sleep_time=0) # debate = Debate(
# save_file_dir=save_file_dir,
# num_players=3,
# openai_api_key=openai_api_key,
# prompts_path=prompts_path,
# temperature=0,
# sleep_time=0
# )
# debate.run() # debate.run()
# debate.save_file_to_json(id) # debate.save_file_to_json(id)

@ -17,7 +17,8 @@ class GraphWorkflow(BaseStructure):
connect(from_node, to_node): Connects two nodes in the graph. connect(from_node, to_node): Connects two nodes in the graph.
set_entry_point(node_name): Sets the entry point node for the workflow. set_entry_point(node_name): Sets the entry point node for the workflow.
add_edge(from_node, to_node): Adds an edge between two nodes in the graph. add_edge(from_node, to_node): Adds an edge between two nodes in the graph.
add_conditional_edges(from_node, condition, edge_dict): Adds conditional edges from a node to multiple nodes based on a condition. add_conditional_edges(from_node, condition, edge_dict):
Adds conditional edges from a node to multiple nodes based on a condition.
run(): Runs the workflow and returns the graph. run(): Runs the workflow and returns the graph.
Examples: Examples:

@ -51,23 +51,23 @@ class LongContextSwarmLeader:
- prompt (str): The formatted string containing the agent metadata. - prompt (str): The formatted string containing the agent metadata.
""" """
prompt = f""" prompt = f"""
You need to recruit a team of members to solve a You need to recruit a team of members to solve a
task. Select the appropriate member based on the task. Select the appropriate member based on the
task description: task description:
# Task Description # Task Description
{task} {task}
# Members # Members
Your output must follow this JSON schema below in markdown format: Your output must follow this JSON schema below in markdown format:
{{ {{
"agent_id": "string", "agent_id": "string",
"agent_name": "string", "agent_name": "string",
"agent_description": "string" "agent_description": "string"
}} }}
""" """
for agent in self.agents: for agent in self.agents:
prompt += ( prompt += (
@ -83,7 +83,7 @@ class LongContextSwarmLeader:
You are the leader of a team of {len(self.agents)} You are the leader of a team of {len(self.agents)}
members. Your team will need to collaborate to members. Your team will need to collaborate to
solve a task. The rule is: solve a task. The rule is:
1. Only you know the task description and task 1. Only you know the task description and task
objective; the other members do not. objective; the other members do not.
2. But they will receive different documents that 2. But they will receive different documents that
@ -95,13 +95,13 @@ class LongContextSwarmLeader:
explicitly include the task objective. explicitly include the task objective.
4. Finally, you need to complete the task based on 4. Finally, you need to complete the task based on
the query results they return. the query results they return.
# Task Description: # Task Description:
{task_description} {task_description}
# Task Objective: # Task Objective:
{task} {task}
# Generate Instruction for Members: # Generate Instruction for Members:
Now, you need to generate an instruction for all Now, you need to generate an instruction for all
team members. You can ask them to answer a team members. You can ask them to answer a
@ -110,7 +110,7 @@ class LongContextSwarmLeader:
Your output must following the JSON Your output must following the JSON
format: {{"type": "instruction", "content": format: {{"type": "instruction", "content":
"your_instruction_content"}} "your_instruction_content"}}
""" """
return prompt return prompt

@ -129,7 +129,8 @@ class MajorityVoting:
multithreaded (bool, optional): Whether to run the agents using multithreading. Defaults to False. multithreaded (bool, optional): Whether to run the agents using multithreading. Defaults to False.
multiprocess (bool, optional): Whether to run the agents using multiprocessing. Defaults to False. multiprocess (bool, optional): Whether to run the agents using multiprocessing. Defaults to False.
asynchronous (bool, optional): Whether to run the agents asynchronously. Defaults to False. asynchronous (bool, optional): Whether to run the agents asynchronously. Defaults to False.
output_parser (callable, optional): A callable function to parse the output of the majority voting system. Defaults to None. output_parser (callable, optional): A callable function to parse the output
of the majority voting system. Defaults to None.
Examples: Examples:
>>> from swarms.structs.agent import Agent >>> from swarms.structs.agent import Agent

@ -68,11 +68,17 @@ class MessagePool(BaseSwarm):
>>> message_pool.add(agent=agent2, content="Hello, agent1!", turn=1) >>> message_pool.add(agent=agent2, content="Hello, agent1!", turn=1)
>>> message_pool.add(agent=agent3, content="Hello, agent1!", turn=1) >>> message_pool.add(agent=agent3, content="Hello, agent1!", turn=1)
>>> message_pool.get_all_messages() >>> message_pool.get_all_messages()
[{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True},
>>> message_pool.get_visible_messages(agent=agent1, turn=1) {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True},
[{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}]
>>> message_pool.get_visible_messages(agent=agent2, turn=1) >>> message_pool.get_visible_messages(agent=agent2, turn=1)
[{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True},
{'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True},
{'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}]
>>> message_pool.get_visible_messages(agent=agent2, turn=1)
[{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True},
{'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True},
{'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}]
""" """
def __init__( def __init__(

@ -12,7 +12,8 @@ def scrape_tool_func_docs(fn: Callable) -> str:
fn (Callable): The function to scrape. fn (Callable): The function to scrape.
Returns: Returns:
str: A string containing the function's name, documentation string, and a list of its parameters. Each parameter is represented as a line containing the parameter's name, default value, and annotation. str: A string containing the function's name, documentation string, and a list of its parameters.
Each parameter is represented as a line containing the parameter's name, default value, and annotation.
""" """
try: try:
# If the function is a tool, get the original function # If the function is a tool, get the original function

@ -85,7 +85,7 @@ def tools_prompt_prep(docs: str = None, scenarios: str = SCENARIOS):
You will be provided with a list of APIs. These APIs will have a You will be provided with a list of APIs. These APIs will have a
description and a list of parameters and return types for each tool. Your description and a list of parameters and return types for each tool. Your
task involves creating varied, complex, and detailed user scenarios task involves creating varied, complex, and detailed user scenarios
that require to call API calls. You must select what api to call based on that require to call API calls. You must select what api to call based on
the context of the task and the scenario. the context of the task and the scenario.
For instance, given the APIs: SearchHotels, BookHotel, CancelBooking, For instance, given the APIs: SearchHotels, BookHotel, CancelBooking,
@ -116,14 +116,14 @@ def tools_prompt_prep(docs: str = None, scenarios: str = SCENARIOS):
different combination of APIs for each scenario. All APIs must be used in different combination of APIs for each scenario. All APIs must be used in
at least one scenario. You can only use the APIs provided in the APIs at least one scenario. You can only use the APIs provided in the APIs
section. section.
Note that API calls are not explicitly mentioned and their uses are Note that API calls are not explicitly mentioned and their uses are
included in parentheses. This behaviour should be mimicked in your included in parentheses. This behaviour should be mimicked in your
response. response.
Output the tool usage in a strict json format with the function name and input to Output the tool usage in a strict json format with the function name and input to
the function. For example, Deliver your response in this format: the function. For example, Deliver your response in this format:
{scenarios} {scenarios}

@ -83,7 +83,7 @@ class TestResult:
prompt = f""" prompt = f"""
This function has been executed for {self.visit_times} times. Last execution: This function has been executed for {self.visit_times} times. Last execution:
1.Status: {self.runtime_status.name} 1.Status: {self.runtime_status.name}
2.Input: 2.Input:
{self.input_data} {self.input_data}
3.Output: 3.Output:
@ -108,7 +108,7 @@ class Action:
def to_json(self): def to_json(self):
try: try:
tool_output = json.loads(self.tool_output) tool_output = json.loads(self.tool_output)
except: except json.JSONDecodeError:
tool_output = self.tool_output tool_output = self.tool_output
return { return {
"thought": self.thought, "thought": self.thought,

@ -18,7 +18,8 @@ def load_model_torch(
model_path (str): Path to the saved model file. model_path (str): Path to the saved model file.
device (torch.device): Device to move the model to. device (torch.device): Device to move the model to.
model (nn.Module): The model architecture, if the model file only contains the state dictionary. model (nn.Module): The model architecture, if the model file only contains the state dictionary.
strict (bool): Whether to strictly enforce that the keys in the state dictionary match the keys returned by the model's `state_dict()` function. strict (bool): Whether to strictly enforce that the keys in the state dictionary match the keys returned by the model's
`state_dict()` function.
map_location (callable): A function to remap the storage locations of the loaded model. map_location (callable): A function to remap the storage locations of the loaded model.
*args: Additional arguments to pass to `torch.load`. *args: Additional arguments to pass to `torch.load`.
**kwargs: Additional keyword arguments to pass to `torch.load`. **kwargs: Additional keyword arguments to pass to `torch.load`.

Loading…
Cancel
Save