From e8519662956dfef1f7b328a56db66110fc78195d Mon Sep 17 00:00:00 2001 From: Wyatt Stanke Date: Mon, 26 Feb 2024 17:20:01 -0500 Subject: [PATCH] Yeah we formatted it --- .flake8 | 11 +++++ example.py | 2 +- ruff.toml | 10 +++++ swarms/artifacts/text_artifact.py | 9 +++-- swarms/memory/dict_internal_memory.py | 6 ++- swarms/memory/dict_shared_memory.py | 6 ++- swarms/memory/lanchain_chroma.py | 3 +- swarms/memory/pinecone.py | 26 ++++++++++-- swarms/models/__init__.py | 4 +- swarms/models/base_multimodal_model.py | 34 +++++++++++----- swarms/models/biogpt.py | 4 +- swarms/models/cog_vlm.py | 3 +- swarms/models/dalle3.py | 6 +-- swarms/models/eleven_labs.py | 3 +- swarms/models/gemini.py | 8 ++-- swarms/models/gpt4_sam.py | 3 +- swarms/models/gpt4_vision_api.py | 5 ++- swarms/models/idefics.py | 3 +- swarms/models/medical_sam.py | 3 +- swarms/models/openai_models.py | 56 +++++++++++++------------- swarms/models/sam.py | 3 +- swarms/models/speecht5.py | 25 ++++++++---- swarms/models/ssd_1b.py | 6 +-- swarms/prompts/tools.py | 2 +- swarms/prompts/worker_prompt.py | 2 +- swarms/structs/agent.py | 6 +-- swarms/structs/async_workflow.py | 3 +- swarms/structs/base_swarm.py | 4 -- swarms/structs/base_workflow.py | 11 ++++- swarms/structs/concurrent_workflow.py | 3 +- swarms/structs/debate.py | 15 +++++-- swarms/structs/graph_workflow.py | 3 +- swarms/structs/long_swarm.py | 20 ++++----- swarms/structs/majority_voting.py | 3 +- swarms/structs/message_pool.py | 14 +++++-- swarms/tools/tool_func_doc_scraper.py | 3 +- swarms/tools/tool_utils.py | 10 ++--- swarms/utils/apa.py | 4 +- swarms/utils/load_model_torch.py | 3 +- 39 files changed, 225 insertions(+), 120 deletions(-) create mode 100644 .flake8 create mode 100644 ruff.toml diff --git a/.flake8 b/.flake8 new file mode 100644 index 00000000..97c0db09 --- /dev/null +++ b/.flake8 @@ -0,0 +1,11 @@ +[flake8] +max-line-length = 127 +extend-ignore = E203 +per-file-ignores = + # Most of this is just long strings + ./swarms/prompts/**.py: E501 W293 W291 + ./swarms/__init__.py: F401 +exclude = + ./playground + ./tests + ./scripts \ No newline at end of file diff --git a/example.py b/example.py index bebdb11a..e4f2e799 100644 --- a/example.py +++ b/example.py @@ -1,6 +1,6 @@ from swarms import Agent, OpenAIChat -## Initialize the workflow +# Initialize the workflow agent = Agent( llm=OpenAIChat(), max_loops=1, diff --git a/ruff.toml b/ruff.toml new file mode 100644 index 00000000..12322827 --- /dev/null +++ b/ruff.toml @@ -0,0 +1,10 @@ +exclude = ["./playground", "./tests", "./scripts"] +line-length = 127 + +[lint] +ignore = ["E203"] +select = ["E", "F", "W"] + +[lint.per-file-ignores] +"./swarms/prompts/**.py" = ["E501", "W291", "W293"] +"./swarms/__init__.py" = ["F401"] diff --git a/swarms/artifacts/text_artifact.py b/swarms/artifacts/text_artifact.py index 5fdfe4fa..844765ad 100644 --- a/swarms/artifacts/text_artifact.py +++ b/swarms/artifacts/text_artifact.py @@ -23,9 +23,12 @@ class TextArtifact(BaseArtifact): Methods: __add__(self, other: BaseArtifact) -> TextArtifact: Concatenates the text value of the artifact with another artifact. __bool__(self) -> bool: Checks if the text value of the artifact is non-empty. - generate_embedding(self, driver: BaseEmbeddingModel) -> Optional[list[float]]: Generates the embedding of the text artifact using a given embedding model. - token_count(self, tokenizer: BaseTokenizer) -> int: Counts the number of tokens in the text artifact using a given tokenizer. - to_bytes(self) -> bytes: Converts the text value of the artifact to bytes using the specified encoding and error handler. + generate_embedding(self, driver: BaseEmbeddingModel) -> Optional[list[float]]: + Generates the embedding of the text artifact using a given embedding model. + token_count(self, tokenizer: BaseTokenizer) -> int: + Counts the number of tokens in the text artifact using a given tokenizer. + to_bytes(self) -> bytes: + Converts the text value of the artifact to bytes using the specified encoding and error handler. """ value: str diff --git a/swarms/memory/dict_internal_memory.py b/swarms/memory/dict_internal_memory.py index daba0b0d..6829d464 100644 --- a/swarms/memory/dict_internal_memory.py +++ b/swarms/memory/dict_internal_memory.py @@ -7,7 +7,8 @@ class InternalMemoryBase(ABC): """Abstract base class for internal memory of agents in the swarm.""" def __init__(self, n_entries): - """Initialize the internal memory. In the current architecture the memory always consists of a set of soltuions or evaluations. + """Initialize the internal memory. + In the current architecture the memory always consists of a set of soltuions or evaluations. During the operation, the agent should retrivie best solutions from it's internal memory based on the score. Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string. @@ -28,7 +29,8 @@ class InternalMemoryBase(ABC): class DictInternalMemory(InternalMemoryBase): def __init__(self, n_entries: int): """ - Initialize the internal memory. In the current architecture the memory always consists of a set of solutions or evaluations. + Initialize the internal memory. + In the current architecture the memory always consists of a set of solutions or evaluations. Simple key-value store for now. Args: diff --git a/swarms/memory/dict_shared_memory.py b/swarms/memory/dict_shared_memory.py index f81e2fd4..f00158af 100644 --- a/swarms/memory/dict_shared_memory.py +++ b/swarms/memory/dict_shared_memory.py @@ -16,13 +16,15 @@ class DictSharedMemory: Methods: __init__(self, file_loc: str = None) -> None: Initializes the shared memory. - add_entry(self, score: float, agent_id: str, agent_cycle: int, entry: Any) -> bool: Adds an entry to the internal memory. + add_entry(self, score: float, agent_id: str, agent_cycle: int, entry: Any) -> bool: + Adds an entry to the internal memory. get_top_n(self, n: int) -> None: Gets the top n entries from the internal memory. write_to_file(self, data: Dict[str, Dict[str, Any]]) -> bool: Writes the internal memory to a file. """ def __init__(self, file_loc: str = None) -> None: - """Initialize the shared memory. In the current architecture the memory always consists of a set of soltuions or evaluations. + """Initialize the shared memory. + In the current architecture the memory always consists of a set of soltuions or evaluations. Moreover, the project is designed around LLMs for the proof of concepts, so we treat all entry content as a string. """ if file_loc is not None: diff --git a/swarms/memory/lanchain_chroma.py b/swarms/memory/lanchain_chroma.py index 95a2e9e3..722a23b6 100644 --- a/swarms/memory/lanchain_chroma.py +++ b/swarms/memory/lanchain_chroma.py @@ -153,7 +153,8 @@ class LangchainChromaVectorMemory: query (str): The query to search for. k (int): The number of results to return. type (str): The type of search to perform: "cos" or "mmr". - distance_threshold (float): The similarity threshold to use for the search. Results with distance > similarity_threshold will be dropped. + distance_threshold (float): + The similarity threshold to use for the search. Results with distance > similarity_threshold will be dropped. Returns: list[str]: A list of the top k results. diff --git a/swarms/memory/pinecone.py b/swarms/memory/pinecone.py index d33cb9cd..e98dc55a 100644 --- a/swarms/memory/pinecone.py +++ b/swarms/memory/pinecone.py @@ -24,13 +24,31 @@ class PineconeDB(AbstractVectorDatabase): index (pinecone.Index, optional): The Pinecone index to use. Defaults to None. Methods: - upsert_vector(vector: list[float], vector_id: Optional[str] = None, namespace: Optional[str] = None, meta: Optional[dict] = None, **kwargs) -> str: + upsert_vector( + vector: list[float], + vector_id: Optional[str] = None, + namespace: Optional[str] = None, + meta: Optional[dict] = None, + **kwargs + ) -> str: Upserts a vector into the index. - load_entry(vector_id: str, namespace: Optional[str] = None) -> Optional[BaseVectorStore.Entry]: + load_entry( + vector_id: str, + namespace: Optional[str] = None + ) -> Optional[BaseVectorStore.Entry]: Loads a single vector from the index. - load_entries(namespace: Optional[str] = None) -> list[BaseVectorStore.Entry]: + load_entries( + namespace: Optional[str] = None + ) -> list[BaseVectorStore.Entry]: Loads all vectors from the index. - query(query: str, count: Optional[int] = None, namespace: Optional[str] = None, include_vectors: bool = False, include_metadata=True, **kwargs) -> list[BaseVectorStore.QueryResult]: + query( + query: str, + count: Optional[int] = None, + namespace: Optional[str] = None, + include_vectors: bool = False, + include_metadata=True, + **kwargs + ) -> list[BaseVectorStore.QueryResult]: Queries the index for vectors similar to the given query string. create_index(name: str, **kwargs) -> None: Creates a new index. diff --git a/swarms/models/__init__.py b/swarms/models/__init__.py index 8981f70e..26f10562 100644 --- a/swarms/models/__init__.py +++ b/swarms/models/__init__.py @@ -14,7 +14,7 @@ from swarms.models.clipq import CLIPQ # noqa: E402 # from swarms.models.whisperx_model import WhisperX # noqa: E402 # from swarms.models.kosmos_two import Kosmos # noqa: E402 # from swarms.models.cog_agent import CogAgent # noqa: E402 -## Function calling models +# Function calling models from swarms.models.fire_function import ( FireFunctionCaller, ) @@ -59,7 +59,7 @@ from swarms.models.timm import TimmModel # noqa: E402 # ) # noqa: E402 from swarms.models.together import TogetherLLM # noqa: E402 -############## Types +# Types from swarms.models.types import ( # noqa: E402 AudioModality, ImageModality, diff --git a/swarms/models/base_multimodal_model.py b/swarms/models/base_multimodal_model.py index 25975eaa..77e8bac7 100644 --- a/swarms/models/base_multimodal_model.py +++ b/swarms/models/base_multimodal_model.py @@ -40,14 +40,30 @@ class BaseMultiModalModel: Examples: >>> from swarms.models.base_multimodal_model import BaseMultiModalModel >>> model = BaseMultiModalModel() + >>> link = "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png" >>> model.run("Generate a summary of this text") - >>> model.run("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png") + >>> model.run("Generate a summary of this text", link) >>> model.run_batch(["Generate a summary of this text", "Generate a summary of this text"]) - >>> model.run_batch([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) - >>> model.run_batch_async(["Generate a summary of this text", "Generate a summary of this text"]) - >>> model.run_batch_async([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) - >>> model.run_batch_async_with_retries(["Generate a summary of this text", "Generate a summary of this text"]) - >>> model.run_batch_async_with_retries([("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png"), ("Generate a summary of this text", "https://www.google.com/images/branding/googlelogo/2x/googlelogo_color_272x92dp.png")]) + >>> model.run_batch([ + ("Generate a summary of this text", link), + ("Generate a summary of this text", link) + ]) + >>> model.run_batch_async([ + "Generate a summary of this text", + "Generate a summary of this text" + ]) + >>> model.run_batch_async([ + ("Generate a summary of this text", link), + ("Generate a summary of this text", link) + ]) + >>> model.run_batch_async_with_retries([ + "Generate a summary of this text", + "Generate a summary of this text" + ]) + >>> model.run_batch_async_with_retries([ + ("Generate a summary of this text", link), + ("Generate a summary of this text", link) + ]) >>> model.generate_summary("Generate a summary of this text") >>> model.set_temperature(0.5) >>> model.set_max_tokens(500) @@ -348,9 +364,9 @@ class BaseMultiModalModel: _type_: _description_ """ META_PROMPT = """ - For any labels or markings on an image that you reference in your response, please - enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for - example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be + For any labels or markings on an image that you reference in your response, please + enclose them in square brackets ([]) and list them explicitly. Do not use ranges; for + example, instead of '1 - 4', list as '[1], [2], [3], [4]'. These labels could be numbers or letters and typically correspond to specific segments or parts of the image. """ return META_PROMPT diff --git a/swarms/models/biogpt.py b/swarms/models/biogpt.py index a5ec7b7b..0d122667 100644 --- a/swarms/models/biogpt.py +++ b/swarms/models/biogpt.py @@ -2,7 +2,8 @@ r""" BioGPT Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. -Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), +Among the two main branches of pre-trained language models in the general language domain, +i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. @@ -24,7 +25,6 @@ advantage of BioGPT on biomedical literature to generate fluent descriptions for number = {6}, year = {2022}, month = {09}, - abstract = "{Pre-trained language models have attracted increasing attention in the biomedical domain, inspired by their great success in the general natural language domain. Among the two main branches of pre-trained language models in the general language domain, i.e. BERT (and its variants) and GPT (and its variants), the first one has been extensively studied in the biomedical domain, such as BioBERT and PubMedBERT. While they have achieved great success on a variety of discriminative downstream biomedical tasks, the lack of generation ability constrains their application scope. In this paper, we propose BioGPT, a domain-specific generative Transformer language model pre-trained on large-scale biomedical literature. We evaluate BioGPT on six biomedical natural language processing tasks and demonstrate that our model outperforms previous models on most tasks. Especially, we get 44.98\%, 38.42\% and 40.76\% F1 score on BC5CDR, KD-DTI and DDI end-to-end relation extraction tasks, respectively, and 78.2\% accuracy on PubMedQA, creating a new record. Our case study on text generation further demonstrates the advantage of BioGPT on biomedical literature to generate fluent descriptions for biomedical terms.}", issn = {1477-4054}, doi = {10.1093/bib/bbac409}, url = {https://doi.org/10.1093/bib/bbac409}, diff --git a/swarms/models/cog_vlm.py b/swarms/models/cog_vlm.py index e456b669..b016f4b5 100644 --- a/swarms/models/cog_vlm.py +++ b/swarms/models/cog_vlm.py @@ -161,7 +161,8 @@ class CogVLMMultiModal(BaseMultiModalModel): Methods: run: Generates a response using the CogVLM model. generate_stream_cogvlm: Generates a stream of responses using the CogVLM model in inference mode. - process_history_and_images: Processes history messages to extract text, identify the last user query, and convert base64 encoded image URLs to PIL images. + process_history_and_images: Processes history messages to extract text, identify the last user query, + and convert base64 encoded image URLs to PIL images. Example: >>> model = CogVLMMultiModal() diff --git a/swarms/models/dalle3.py b/swarms/models/dalle3.py index 6b225b49..381a58f6 100644 --- a/swarms/models/dalle3.py +++ b/swarms/models/dalle3.py @@ -257,7 +257,7 @@ class Dalle3: """Print the Dalle3 dashboard""" print( colored( - f"""Dalle3 Dashboard: + f"""Dalle3 Dashboard: -------------------- Model: {self.model} @@ -271,8 +271,8 @@ class Dalle3: Save Folder: {self.save_folder} Image Format: {self.image_format} -------------------- - - + + """, "green", ) diff --git a/swarms/models/eleven_labs.py b/swarms/models/eleven_labs.py index 2d55e864..f82f43ca 100644 --- a/swarms/models/eleven_labs.py +++ b/swarms/models/eleven_labs.py @@ -37,7 +37,8 @@ class ElevenLabsText2SpeechTool(BaseTool): Defaults to ElevenLabsModel.MULTI_LINGUAL. name (str): The name of the tool. Defaults to "eleven_labs_text2speech". description (str): The description of the tool. - Defaults to "A wrapper around Eleven Labs Text2Speech. Useful for when you need to convert text to speech. It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi." + Defaults to "A wrapper around Eleven Labs Text2Speech. Useful for when you need to convert text to speech. + It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi." Usage: diff --git a/swarms/models/gemini.py b/swarms/models/gemini.py index 249e9c53..3aa6222d 100644 --- a/swarms/models/gemini.py +++ b/swarms/models/gemini.py @@ -132,13 +132,13 @@ class Gemini(BaseMultiModalModel): system_prompt (str, optional): _description_. Defaults to None. """ PROMPT = f""" - + {self.system_prompt} - + ###### - + {task} - + """ return PROMPT diff --git a/swarms/models/gpt4_sam.py b/swarms/models/gpt4_sam.py index 37dde6a0..b6c98771 100644 --- a/swarms/models/gpt4_sam.py +++ b/swarms/models/gpt4_sam.py @@ -11,7 +11,8 @@ from swarms.utils.supervision_visualizer import MarkVisualizer class GPT4VSAM(BaseMultiModalModel): """ GPT4VSAM class represents a multi-modal model that combines the capabilities of GPT-4 and SegmentAnythingMarkGenerator. - It takes an instance of BaseMultiModalModel (vlm) and a device as input and provides methods for loading images and making predictions. + It takes an instance of BaseMultiModalModel (vlm) + and a device as input and provides methods for loading images and making predictions. Args: vlm (BaseMultiModalModel): An instance of BaseMultiModalModel representing the visual language model. diff --git a/swarms/models/gpt4_vision_api.py b/swarms/models/gpt4_vision_api.py index 5966a0b6..808415b3 100644 --- a/swarms/models/gpt4_vision_api.py +++ b/swarms/models/gpt4_vision_api.py @@ -203,8 +203,9 @@ class GPT4VisionAPI(BaseMultiModalModel): """ PROMPT = f""" - These are frames from a video that I want to upload. Generate a compelling description that I can upload along with the video: - + These are frames from a video that I want to upload. + Generate a compelling description that I can upload along with the video: + {frames} """ return PROMPT diff --git a/swarms/models/idefics.py b/swarms/models/idefics.py index cc654221..4054bae9 100644 --- a/swarms/models/idefics.py +++ b/swarms/models/idefics.py @@ -63,7 +63,8 @@ class Idefics(BaseMultiModalModel): response = model.chat(user_input) print(response) - user_input = "User: And who is that? https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052" + user_input = "User: And who is that? \ +https://static.wikia.nocookie.net/asterix/images/2/25/R22b.gif/revision/latest?cb=20110815073052" response = model.chat(user_input) print(response) diff --git a/swarms/models/medical_sam.py b/swarms/models/medical_sam.py index 8d096ba5..09203feb 100644 --- a/swarms/models/medical_sam.py +++ b/swarms/models/medical_sam.py @@ -26,7 +26,8 @@ class MedicalSAM: Methods: __post_init__(): Initializes the MedicalSAM object. - download_model_weights(model_path: str): Downloads the model weights from the specified URL and saves them to the given file path. + download_model_weights(model_path: str): + Downloads the model weights from the specified URL and saves them to the given file path. preprocess(img): Preprocesses the input image. run(img, box): Runs the semantic segmentation on the input image within the specified bounding box. diff --git a/swarms/models/openai_models.py b/swarms/models/openai_models.py index ec4434b6..42cced02 100644 --- a/swarms/models/openai_models.py +++ b/swarms/models/openai_models.py @@ -37,9 +37,6 @@ from tenacity import ( stop_after_attempt, wait_exponential, ) - -logger = logging.getLogger(__name__) - from importlib.metadata import version from packaging.version import parse @@ -239,9 +236,9 @@ class BaseOpenAI(BaseLLM): attributes["openai_api_base"] = self.openai_api_base if self.openai_organization != "": - attributes[ - "openai_organization" - ] = self.openai_organization + attributes["openai_organization"] = ( + self.openai_organization + ) if self.openai_proxy != "": attributes["openai_proxy"] = self.openai_proxy @@ -651,9 +648,10 @@ class BaseOpenAI(BaseLLM): "organization": self.openai_organization, } if self.openai_proxy: - import openai + pass - # TODO: The 'openai.proxy' option isn't read in the client API. You will need to pass it when you instantiate the client, e.g. 'OpenAI(proxy={"http": self.openai_proxy, "https": self.openai_proxy})' + # TODO: The 'openai.proxy' option isn't read in the client API. You will need to pass it when you instantiate the + # client, e.g. 'OpenAI(proxy={"http": self.openai_proxy, "https": self.openai_proxy})' # openai.proxy = {"http": self.openai_proxy, "https": self.openai_proxy} # type: ignore[assignment] # noqa: E501 return {**openai_creds, **self._default_params} @@ -940,27 +938,27 @@ class OpenAIChat(BaseLLM): @root_validator() def validate_environment(cls, values: dict) -> dict: """Validate that api key and python package exists in environment.""" - openai_api_key = get_from_dict_or_env( - values, "openai_api_key", "OPENAI_API_KEY" - ) - openai_api_base = get_from_dict_or_env( - values, - "openai_api_base", - "OPENAI_API_BASE", - default="", - ) - openai_proxy = get_from_dict_or_env( - values, - "openai_proxy", - "OPENAI_PROXY", - default="", - ) - openai_organization = get_from_dict_or_env( - values, - "openai_organization", - "OPENAI_ORGANIZATION", - default="", - ) + # openai_api_key = get_from_dict_or_env( + # values, "openai_api_key", "OPENAI_API_KEY" + # ) + # openai_api_base = get_from_dict_or_env( + # values, + # "openai_api_base", + # "OPENAI_API_BASE", + # default="", + # ) + # openai_proxy = get_from_dict_or_env( + # values, + # "openai_proxy", + # "OPENAI_PROXY", + # default="", + # ) + # openai_organization = get_from_dict_or_env( + # values, + # "openai_organization", + # "OPENAI_ORGANIZATION", + # default="", + # ) try: import openai except ImportError: diff --git a/swarms/models/sam.py b/swarms/models/sam.py index c51a2517..05cf77db 100644 --- a/swarms/models/sam.py +++ b/swarms/models/sam.py @@ -27,7 +27,8 @@ class SAM: processor (SamProcessor): The processor for the SAM model. Methods: - run(task=None, img=None, *args, **kwargs): Runs the SAM model on the given image and returns the segmentation scores and masks. + run(task=None, img=None, *args, **kwargs): + Runs the SAM model on the given image and returns the segmentation scores and masks. process_img(img: str = None, *args, **kwargs): Processes the input image and returns the processed image. """ diff --git a/swarms/models/speecht5.py b/swarms/models/speecht5.py index b9f2653b..27cc6059 100644 --- a/swarms/models/speecht5.py +++ b/swarms/models/speecht5.py @@ -2,18 +2,29 @@ SpeechT5 (TTS task) SpeechT5 model fine-tuned for speech synthesis (text-to-speech) on LibriTTS. -This model was introduced in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. +This model was introduced in SpeechT5: Unified-Modal Encoder-Decoder Pre-Training for Spoken Language Processing by +Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, +Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. SpeechT5 was first released in this repository, original weights. The license used is MIT. Model Description -Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific (speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network models the sequence-to-sequence transformation, and then the post-nets generate the output in the speech/text modality based on the output of the decoder. +Motivated by the success of T5 (Text-To-Text Transfer Transformer) in pre-trained natural language processing models, +we propose a unified-modal SpeechT5 framework that explores the encoder-decoder pre-training for self-supervised speech/text +representation learning. The SpeechT5 framework consists of a shared encoder-decoder network and six modal-specific +(speech/text) pre/post-nets. After preprocessing the input speech/text through the pre-nets, the shared encoder-decoder network +models the sequence-to-sequence transformation,and then the post-nets generate the output in the speech/text modality based on +the output of the decoder. + +Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, +hoping to improve the modeling capability for both speech and text. To align the textual and speech information into +this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text +states with latent units as the interface between encoder and decoder. + +Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing +tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, +and speaker identification. -Leveraging large-scale unlabeled speech and text data, we pre-train SpeechT5 to learn a unified-modal representation, hoping to improve the modeling capability for both speech and text. To align the textual and speech information into this unified semantic space, we propose a cross-modal vector quantization approach that randomly mixes up speech/text states with latent units as the interface between encoder and decoder. - -Extensive evaluations show the superiority of the proposed SpeechT5 framework on a wide variety of spoken language processing tasks, including automatic speech recognition, speech synthesis, speech translation, voice conversion, speech enhancement, and speaker identification. - -Developed by: Junyi Ao, Rui Wang, Long Zhou, Chengyi Wang, Shuo Ren, Yu Wu, Shujie Liu, Tom Ko, Qing Li, Yu Zhang, Zhihua Wei, Yao Qian, Jinyu Li, Furu Wei. Shared by [optional]: Matthijs Hollemans Model type: text-to-speech Language(s) (NLP): [More Information Needed] diff --git a/swarms/models/ssd_1b.py b/swarms/models/ssd_1b.py index 4479c866..1dc6c00a 100644 --- a/swarms/models/ssd_1b.py +++ b/swarms/models/ssd_1b.py @@ -171,7 +171,7 @@ class SSD1B: """Print the SSD1B dashboard""" print( colored( - f"""SSD1B Dashboard: + f"""SSD1B Dashboard: -------------------- Model: {self.model} @@ -185,8 +185,8 @@ class SSD1B: Save Folder: {self.save_folder} Image Format: {self.image_format} -------------------- - - + + """, "green", ) diff --git a/swarms/prompts/tools.py b/swarms/prompts/tools.py index fe82ba5d..fe1c8980 100644 --- a/swarms/prompts/tools.py +++ b/swarms/prompts/tools.py @@ -34,7 +34,7 @@ commands: { """ -########### FEW SHOT EXAMPLES ################ +# FEW SHOT EXAMPLES # SCENARIOS = """ commands: { "tools": { diff --git a/swarms/prompts/worker_prompt.py b/swarms/prompts/worker_prompt.py index 08636516..63723d36 100644 --- a/swarms/prompts/worker_prompt.py +++ b/swarms/prompts/worker_prompt.py @@ -62,6 +62,6 @@ def worker_tools_sop_promp(name: str, memory: str, time=time): [{memory}] Human: Determine which next command to use, and respond using the format specified above: - """.format(name=name, time=time, memory=memory) + """.format(name=name, time=time, memory=memory) # noqa: F521 return str(out) diff --git a/swarms/structs/agent.py b/swarms/structs/agent.py index e1282e5b..25df53a3 100644 --- a/swarms/structs/agent.py +++ b/swarms/structs/agent.py @@ -447,7 +447,7 @@ class Agent: Name: {self.agent_name} Description: {self.agent_description} Standard Operating Procedure: {self.sop} - System Prompt: {self.system_prompt} + System Prompt: {self.system_prompt} Task: {task} Max Loops: {self.max_loops} Stopping Condition: {self.stopping_condition} @@ -778,7 +778,7 @@ class Agent: Follow this standard operating procedure (SOP) to complete tasks: {self.sop} - + {history} """ return agent_history_prompt @@ -786,7 +786,7 @@ class Agent: system_prompt = self.system_prompt agent_history_prompt = f""" System : {system_prompt} - + {history} """ return agent_history_prompt diff --git a/swarms/structs/async_workflow.py b/swarms/structs/async_workflow.py index fa53c46b..cec65878 100644 --- a/swarms/structs/async_workflow.py +++ b/swarms/structs/async_workflow.py @@ -96,7 +96,8 @@ class AsyncWorkflow: # if self.dashboard: # self.display() - # Add a stopping condition to stop the workflow, if provided but stopping_condition takes in a parameter s for string + # Add a stopping condition to stop the workflow, + # if provided but stopping_condition takes in a parameter s for string if self.stopping_condition: if self.stopping_condition(self.results): break diff --git a/swarms/structs/base_swarm.py b/swarms/structs/base_swarm.py index ed910546..2f6176dc 100644 --- a/swarms/structs/base_swarm.py +++ b/swarms/structs/base_swarm.py @@ -86,10 +86,6 @@ class AbstractSwarm(ABC): def step(self): """Step the swarm""" - # @abstractmethod - def add_agent(self, agent: "Agent"): - """Add a agent to the swarm""" - # @abstractmethod def remove_agent(self, agent: "Agent"): """Remove a agent from the swarm""" diff --git a/swarms/structs/base_workflow.py b/swarms/structs/base_workflow.py index ace1fd3d..982207f7 100644 --- a/swarms/structs/base_workflow.py +++ b/swarms/structs/base_workflow.py @@ -181,7 +181,16 @@ class BaseWorkflow(BaseStructure): >>> workflow.add("Create a report on these metrics", llm) >>> workflow.delete_task("What's the weather in miami") >>> workflow.tasks - [Task(description='Create a report on these metrics', agent=Agent(llm=OpenAIChat(openai_api_key=''), max_loops=1, dashboard=False), args=[], kwargs={}, result=None, history=[])] + [ + Task( + description='Create a report on these metrics', + agent=Agent(llm=OpenAIChat(openai_api_key=''), max_loops=1, dashboard=False), + args=[], + kwargs={}, + result=None, + history=[] + ) + ] """ try: for task in self.tasks: diff --git a/swarms/structs/concurrent_workflow.py b/swarms/structs/concurrent_workflow.py index f36df3b3..2c83895a 100644 --- a/swarms/structs/concurrent_workflow.py +++ b/swarms/structs/concurrent_workflow.py @@ -15,7 +15,8 @@ class ConcurrentWorkflow(BaseStructure): Args: max_workers (int): The maximum number of workers to use for the ThreadPoolExecutor. autosave (bool): Whether to save the state of the workflow to a file. Default is False. - saved_state_filepath (str): The filepath to save the state of the workflow to. Default is "runs/concurrent_workflow.json". + saved_state_filepath (str): + The filepath to save the state of the workflow to. Default is "runs/concurrent_workflow.json". print_results (bool): Whether to print the results of each task. Default is False. return_results (bool): Whether to return the results of each task. Default is False. use_processes (bool): Whether to use processes instead of threads. Default is False. diff --git a/swarms/structs/debate.py b/swarms/structs/debate.py index 95c889d3..c1510ef0 100644 --- a/swarms/structs/debate.py +++ b/swarms/structs/debate.py @@ -19,7 +19,8 @@ class DebatePlayer(Agent): Args: model_name(str): model name name (str): name of this player - temperature (float): higher values make the output more random, while lower values make it more focused and deterministic + temperature (float): + higher values make the output more random, while lower values make it more focused and deterministic openai_api_key (str): As the parameter name suggests sleep_time (float): sleep because of rate limits """ @@ -31,7 +32,8 @@ class Debate: Args: model_name (str): openai model name - temperature (float): higher values make the output more random, while lower values make it more focused and deterministic + temperature (float): + higher values make the output more random, while lower values make it more focused and deterministic num_players (int): num of players save_file_dir (str): dir path to json file openai_api_key (str): As the parameter name suggests @@ -359,6 +361,13 @@ class Debate: # with open(prompts_path, 'w') as file: # json.dump(config, file, ensure_ascii=False, indent=4) -# debate = Debate(save_file_dir=save_file_dir, num_players=3, openai_api_key=openai_api_key, prompts_path=prompts_path, temperature=0, sleep_time=0) +# debate = Debate( +# save_file_dir=save_file_dir, +# num_players=3, +# openai_api_key=openai_api_key, +# prompts_path=prompts_path, +# temperature=0, +# sleep_time=0 +# ) # debate.run() # debate.save_file_to_json(id) diff --git a/swarms/structs/graph_workflow.py b/swarms/structs/graph_workflow.py index 23d90339..d06a5477 100644 --- a/swarms/structs/graph_workflow.py +++ b/swarms/structs/graph_workflow.py @@ -17,7 +17,8 @@ class GraphWorkflow(BaseStructure): connect(from_node, to_node): Connects two nodes in the graph. set_entry_point(node_name): Sets the entry point node for the workflow. add_edge(from_node, to_node): Adds an edge between two nodes in the graph. - add_conditional_edges(from_node, condition, edge_dict): Adds conditional edges from a node to multiple nodes based on a condition. + add_conditional_edges(from_node, condition, edge_dict): + Adds conditional edges from a node to multiple nodes based on a condition. run(): Runs the workflow and returns the graph. Examples: diff --git a/swarms/structs/long_swarm.py b/swarms/structs/long_swarm.py index e24a3e08..7df54726 100644 --- a/swarms/structs/long_swarm.py +++ b/swarms/structs/long_swarm.py @@ -51,23 +51,23 @@ class LongContextSwarmLeader: - prompt (str): The formatted string containing the agent metadata. """ prompt = f""" - + You need to recruit a team of members to solve a task. Select the appropriate member based on the task description: - + # Task Description {task} - + # Members - + Your output must follow this JSON schema below in markdown format: {{ "agent_id": "string", "agent_name": "string", "agent_description": "string" }} - + """ for agent in self.agents: prompt += ( @@ -83,7 +83,7 @@ class LongContextSwarmLeader: You are the leader of a team of {len(self.agents)} members. Your team will need to collaborate to solve a task. The rule is: - + 1. Only you know the task description and task objective; the other members do not. 2. But they will receive different documents that @@ -95,13 +95,13 @@ class LongContextSwarmLeader: explicitly include the task objective. 4. Finally, you need to complete the task based on the query results they return. - + # Task Description: {task_description} - + # Task Objective: {task} - + # Generate Instruction for Members: Now, you need to generate an instruction for all team members. You can ask them to answer a @@ -110,7 +110,7 @@ class LongContextSwarmLeader: Your output must following the JSON format: {{"type": "instruction", "content": "your_instruction_content"}} - + """ return prompt diff --git a/swarms/structs/majority_voting.py b/swarms/structs/majority_voting.py index fc4f8018..573684b0 100644 --- a/swarms/structs/majority_voting.py +++ b/swarms/structs/majority_voting.py @@ -129,7 +129,8 @@ class MajorityVoting: multithreaded (bool, optional): Whether to run the agents using multithreading. Defaults to False. multiprocess (bool, optional): Whether to run the agents using multiprocessing. Defaults to False. asynchronous (bool, optional): Whether to run the agents asynchronously. Defaults to False. - output_parser (callable, optional): A callable function to parse the output of the majority voting system. Defaults to None. + output_parser (callable, optional): A callable function to parse the output + of the majority voting system. Defaults to None. Examples: >>> from swarms.structs.agent import Agent diff --git a/swarms/structs/message_pool.py b/swarms/structs/message_pool.py index 37dbb19e..552b4a9b 100644 --- a/swarms/structs/message_pool.py +++ b/swarms/structs/message_pool.py @@ -68,11 +68,17 @@ class MessagePool(BaseSwarm): >>> message_pool.add(agent=agent2, content="Hello, agent1!", turn=1) >>> message_pool.add(agent=agent3, content="Hello, agent1!", turn=1) >>> message_pool.get_all_messages() - [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] - >>> message_pool.get_visible_messages(agent=agent1, turn=1) - [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] + [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, + {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, + {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] >>> message_pool.get_visible_messages(agent=agent2, turn=1) - [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] + [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, + {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, + {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] + >>> message_pool.get_visible_messages(agent=agent2, turn=1) + [{'agent': Agent(agent_name='agent1'), 'content': 'Hello, agent2!', 'turn': 1, 'visible_to': 'all', 'logged': True}, + {'agent': Agent(agent_name='agent2'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}, + {'agent': Agent(agent_name='agent3'), 'content': 'Hello, agent1!', 'turn': 1, 'visible_to': 'all', 'logged': True}] """ def __init__( diff --git a/swarms/tools/tool_func_doc_scraper.py b/swarms/tools/tool_func_doc_scraper.py index fccfc6a1..60e6de6f 100644 --- a/swarms/tools/tool_func_doc_scraper.py +++ b/swarms/tools/tool_func_doc_scraper.py @@ -12,7 +12,8 @@ def scrape_tool_func_docs(fn: Callable) -> str: fn (Callable): The function to scrape. Returns: - str: A string containing the function's name, documentation string, and a list of its parameters. Each parameter is represented as a line containing the parameter's name, default value, and annotation. + str: A string containing the function's name, documentation string, and a list of its parameters. + Each parameter is represented as a line containing the parameter's name, default value, and annotation. """ try: # If the function is a tool, get the original function diff --git a/swarms/tools/tool_utils.py b/swarms/tools/tool_utils.py index ee6b6391..3f51187d 100644 --- a/swarms/tools/tool_utils.py +++ b/swarms/tools/tool_utils.py @@ -85,7 +85,7 @@ def tools_prompt_prep(docs: str = None, scenarios: str = SCENARIOS): You will be provided with a list of APIs. These APIs will have a description and a list of parameters and return types for each tool. Your task involves creating varied, complex, and detailed user scenarios - that require to call API calls. You must select what api to call based on + that require to call API calls. You must select what api to call based on the context of the task and the scenario. For instance, given the APIs: SearchHotels, BookHotel, CancelBooking, @@ -116,14 +116,14 @@ def tools_prompt_prep(docs: str = None, scenarios: str = SCENARIOS): different combination of APIs for each scenario. All APIs must be used in at least one scenario. You can only use the APIs provided in the APIs section. - + Note that API calls are not explicitly mentioned and their uses are included in parentheses. This behaviour should be mimicked in your response. - - Output the tool usage in a strict json format with the function name and input to + + Output the tool usage in a strict json format with the function name and input to the function. For example, Deliver your response in this format: - + ‘‘‘ {scenarios} ‘‘‘ diff --git a/swarms/utils/apa.py b/swarms/utils/apa.py index 05b25c5c..bbb7271c 100644 --- a/swarms/utils/apa.py +++ b/swarms/utils/apa.py @@ -83,7 +83,7 @@ class TestResult: prompt = f""" This function has been executed for {self.visit_times} times. Last execution: 1.Status: {self.runtime_status.name} -2.Input: +2.Input: {self.input_data} 3.Output: @@ -108,7 +108,7 @@ class Action: def to_json(self): try: tool_output = json.loads(self.tool_output) - except: + except json.JSONDecodeError: tool_output = self.tool_output return { "thought": self.thought, diff --git a/swarms/utils/load_model_torch.py b/swarms/utils/load_model_torch.py index 53649e93..bda898ac 100644 --- a/swarms/utils/load_model_torch.py +++ b/swarms/utils/load_model_torch.py @@ -18,7 +18,8 @@ def load_model_torch( model_path (str): Path to the saved model file. device (torch.device): Device to move the model to. model (nn.Module): The model architecture, if the model file only contains the state dictionary. - strict (bool): Whether to strictly enforce that the keys in the state dictionary match the keys returned by the model's `state_dict()` function. + strict (bool): Whether to strictly enforce that the keys in the state dictionary match the keys returned by the model's + `state_dict()` function. map_location (callable): A function to remap the storage locations of the loaded model. *args: Additional arguments to pass to `torch.load`. **kwargs: Additional keyword arguments to pass to `torch.load`.