[OpenAITTS][AbstractWorker FIX] [CHORES]

2 years ago · e719e83912
parent 58c0ee1986
commit e719e83912
8 changed files with 329 additions and 131 deletions
--- a/playground/demos/personal_stylist/personal_stylist.py
+++ b/playground/demos/personal_stylist/personal_stylist.py
@ -64,23 +64,43 @@ accessories_stylist_agent = Agent(
 # Run agents with respective tasks
 haircut_suggestions = haircut_stylist_agent.run(
-    "Suggest suitable haircuts for this user, considering their face shape and hair type.", user_selfie
+    (
        "Suggest suitable haircuts for this user, considering their"
        " face shape and hair type."
    ),
    user_selfie,
 )
 # Run Makeup or Beard agent based on gender
 if user_gender == "woman":
    makeup_suggestions = makeup_or_beard_stylist_agent.run(
-        "Recommend makeup styles for this user, complementing their features.", user_selfie
+        (
            "Recommend makeup styles for this user, complementing"
            " their features."
        ),
        user_selfie,
    )
 elif user_gender == "man":
    beard_suggestions = makeup_or_beard_stylist_agent.run(
-        "Provide beard styling advice for this user, considering their face shape.", user_selfie
+        (
            "Provide beard styling advice for this user, considering"
            " their face shape."
        ),
        user_selfie,
    )
 clothing_suggestions = clothing_stylist_agent.run(
-    "Match clothing styles and colors for this user, using color matching principles.", clothes_image
+    (
        "Match clothing styles and colors for this user, using color"
        " matching principles."
    ),
    clothes_image,
 )
 accessories_suggestions = accessories_stylist_agent.run(
-    "Suggest accessories to complement this user's outfit, considering the overall style.", clothes_image
+    (
        "Suggest accessories to complement this user's outfit,"
        " considering the overall style."
    ),
    clothes_image,
 )
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -8,7 +8,7 @@ from swarms.models.openai_models import (
    AzureOpenAI,
    OpenAIChat,
 )  # noqa: E402
-from swarms.models.vllm import vLLM  # noqa: E402
+# from swarms.models.vllm import vLLM  # noqa: E402
 # from swarms.models.zephyr import Zephyr  # noqa: E402
 from swarms.models.biogpt import BioGPT  # noqa: E402
@ -29,7 +29,7 @@ from swarms.models.layoutlm_document_qa import (
    LayoutLMDocumentQA,
 )  # noqa: E402
 from swarms.models.gpt4_vision_api import GPT4VisionAPI  # noqa: E402
-
+from swarms.models.openai_tts import OpenAITTS  # noqa: E402
 # from swarms.models.gpt4v import GPT4Vision
 # from swarms.models.dalle3 import Dalle3
@ -60,5 +60,6 @@ __all__ = [
    # "Dalle3",
    # "DistilWhisperModel",
    "GPT4VisionAPI",
-    "vLLM",
+    # "vLLM",
    "OpenAITTS",
 ]
--- a/swarms/models/base_multimodal_model.py
+++ b/swarms/models/base_multimodal_model.py
@ -11,6 +11,14 @@ import requests
 from PIL import Image
 from termcolor import colored
 try:
    import cv2
 except ImportError:
    print(
        "Error importing cv2 try installing it with `pip install"
        " opencv-python`"
    )
 class BaseMultiModalModel:
    """
@ -126,6 +134,42 @@ class BaseMultiModalModel:
        image_pil = Image.open(img)
        return image_pil
    def process_video(
        self,
        video_path: str = None,
        type_img: str = ".jpg",
        *args,
        **kwargs,
    ):
        """Process a video
        Args:
            video_path (str, optional): _description_. Defaults to None.
            type_img (str, optional): _description_. Defaults to ".jpg".
            *args: _description_.
            **kwargs: _description_.
        """
        try:
            video = cv2.VideoCapture(video_path)
            base64Frames = []
            while video.isOpened():
                success, frame = video.read()
                if not success:
                    break
                _, buffer = cv2.imencode(type_img, frame)
                base64Frames.append(
                    base64.b64encode(buffer).decode("utf-8")
                )
            video.release()
            print(len(base64Frames), "frames read")
            return base64Frames
        except Exception as error:
            print(f"Error processing video {error} try again")
            raise error
    def clear_chat_history(self):
        """Clear the chat history"""
        self.chat_history = []
--- a/swarms/models/gpt4_vision_api.py
+++ b/swarms/models/gpt4_vision_api.py
@ -1,16 +1,14 @@
 import asyncio
 import base64
 import concurrent.futures
 import json
 import logging
 import os
-from concurrent.futures import ThreadPoolExecutor
+from typing import Optional
 from typing import List, Optional, Tuple
 import aiohttp
 import requests
 from dotenv import load_dotenv
 from termcolor import colored
 from swarms.models.base_multimodal_model import BaseMultiModalModel
 try:
    import cv2
@ -32,7 +30,7 @@ You are an multi-modal autonomous agent. You are given a task and an image. You
 """
-class GPT4VisionAPI:
+class GPT4VisionAPI(BaseMultiModalModel):
    """
    GPT-4 Vision API
@ -81,7 +79,7 @@ class GPT4VisionAPI:
        *args,
        **kwargs,
    ):
-        super().__init__()
+        super(GPT4VisionAPI).__init__(*args, **kwargs)
        self.openai_api_key = openai_api_key
        self.logging_enabled = logging_enabled
        self.model_name = model_name
@ -117,7 +115,7 @@ class GPT4VisionAPI:
        pass
    # Function to handle vision tasks
-    def run(self, img, task):
+    def run(self, img: str, task: str, *args, **kwargs):
        """Run the model."""
        try:
            base64_image = self.encode_image(img)
@ -245,9 +243,17 @@ class GPT4VisionAPI:
        video.release()
        print(len(base64_frames), "frames read.")
        return base64_frames
-        for img in base64_frames:
+    def run_with_video(
-            base64.b64decode(img.encode("utf-8"))
+        self,
        task: str = None,
        video: str = None,
        *args,
        **kwargs,
    ):
        self.video_prompt(self.process_video(video))
        pass
    def __call__(
        self,
@ -256,7 +262,15 @@ class GPT4VisionAPI:
        *args,
        **kwargs,
    ):
-        """Run the model."""
+        """Call the model
        Args:
            task (Optional[str], optional): _description_. Defaults to None.
            img (Optional[str], optional): _description_. Defaults to None.
        Raises:
            error: _description_
        """
        try:
            base64_image = self.encode_image(img)
            headers = {
@ -309,35 +323,6 @@ class GPT4VisionAPI:
            print(f"Error with the request: {error}")
            raise error
    def run_many(
        self,
        tasks: List[str],
        imgs: List[str],
    ):
        """
        Run the model on multiple tasks and images all at once using concurrent
        Args:
            tasks (List[str]): List of tasks
            imgs (List[str]): List of image paths
        Returns:
            List[str]: List of responses
        """
        # Instantiate the thread pool executor
        with ThreadPoolExecutor(
            max_workers=self.max_workers
        ) as executor:
            results = executor.map(self.run, tasks, imgs)
        # Print the results for debugging
        for result in results:
            print(result)
        return list(results)
    async def arun(
        self,
        task: Optional[str] = None,
@ -396,42 +381,6 @@ class GPT4VisionAPI:
            print(f"Error with the request {error}")
            raise error
    def run_batch(
        self, tasks_images: List[Tuple[str, str]]
    ) -> List[str]:
        """Process a batch of tasks and images"""
        with concurrent.futures.ThreadPoolExecutor() as executor:
            futures = [
                executor.submit(self.run, task, img)
                for task, img in tasks_images
            ]
            results = [future.result() for future in futures]
        return results
    async def run_batch_async(
        self, tasks_images: List[Tuple[str, str]]
    ) -> List[str]:
        """Process a batch of tasks and images asynchronously"""
        loop = asyncio.get_event_loop()
        futures = [
            loop.run_in_executor(None, self.run, task, img)
            for task, img in tasks_images
        ]
        return await asyncio.gather(*futures)
    async def run_batch_async_with_retries(
        self, tasks_images: List[Tuple[str, str]]
    ) -> List[str]:
        """Process a batch of tasks and images asynchronously with retries"""
        loop = asyncio.get_event_loop()
        futures = [
            loop.run_in_executor(
                None, self.run_with_retries, task, img
            )
            for task, img in tasks_images
        ]
        return await asyncio.gather(*futures)
    def health_check(self):
        """Health check for the GPT4Vision model"""
        try:
--- a/swarms/models/openai_tts.py
+++ b/swarms/models/openai_tts.py
@ -0,0 +1,87 @@
 import os
 import openai
 import requests
 from dotenv import load_dotenv
 from swarms.models.base_llm import AbstractLLM
 # Load .env file
 load_dotenv()
 # OpenAI API Key env
 def openai_api_key_env():
    openai_api_key = os.getenv("OPENAI_API_KEY")
    return openai_api_key
 class OpenAITTS(AbstractLLM):
    """OpenAI TTS model
    Attributes:
        model_name (str): _description_
        proxy_url (str): _description_
        openai_api_key (str): _description_
        voice (str): _description_
        chunk_size (_type_): _description_
    Methods:
        run: _description_
    Examples:
    >>> from swarms.models.openai_tts import OpenAITTS
    >>> tts = OpenAITTS(
    ...     model_name = "tts-1-1106",
    ...     proxy_url = "https://api.openai.com/v1/audio/speech",
    ...     openai_api_key = openai_api_key_env,
    ...     voice = "onyx",
    ... )
    >>> tts.run("Hello world")
    """
    def __init__(
        self,
        model_name: str = "tts-1-1106",
        proxy_url: str = "https://api.openai.com/v1/audio/speech",
        openai_api_key: str = openai_api_key_env,
        voice: str = "onyx",
        chunk_size = 1024 * 1024,
    ):
        super().__init__()
        self.model_name = model_name
        self.proxy_url = proxy_url
        self.openai_api_key = openai_api_key
        self.voice = voice
        self.chunk_size = chunk_size
    def run(
        self,
        task: str,
        *args,
        **kwargs
    ):
        """Run the tts model
        Args:
            task (str): _description_
        Returns:
            _type_: _description_
        """
        response = requests.post(
            self.proxy_url,
            headers = {
                "Authorization": f"Bearer {self.openai_api_key}",
            },
            json={
                "model": self.model_name,
                "input": task,
                "voice": self.voice,
            },
        )
        audio = b""
        for chunk in response.iter_content(chunk_size = 1024 * 1024):
            audio += chunk
        return audio
--- a/swarms/swarms/base.py
+++ b/swarms/swarms/base.py
@ -1,14 +1,28 @@
 """
 Paid    
 # TODO: Pass in abstract LLM class that can utilize Hf or Anthropic models, Move away from OPENAI
 # TODO: ADD Universal Communication Layer, a ocean vectorstore instance
 # TODO: BE MORE EXPLICIT ON TOOL USE, TASK DECOMPOSITION AND TASK COMPLETETION AND ALLOCATION
 # TODO: Add RLHF Data collection, ask user how the swarm is performing
 # TODO: Create an onboarding process if not settings are preconfigured like `from swarms import Swarm, Swarm()` => then initiate onboarding name your swarm + provide purpose + etc
 """
 import asyncio
 import concurrent.futures
 import logging
 import time
 from abc import ABC, abstractmethod
 from typing import Any, Dict, List, Optional, Callable
 from swarms.structs.agent import Agent
 from swarms.agents.base import AbstractWorker
 from concurrent.futures import ThreadPoolExecutor, as_completed
-import logging
+from typing import Any, Callable, Dict, List, Optional
 from termcolor import colored
 from swarms.structs.agent import Agent
 class AbstractSwarm(ABC):
    """
@ -23,23 +37,23 @@ class AbstractSwarm(ABC):
        communicate: Communicate with the swarm through the orchestrator, protocols, and the universal communication layer
        run: Run the swarm
        step: Step the swarm
-        add_worker: Add a worker to the swarm
+        add_agent: Add a agent to the swarm
-        remove_worker: Remove a worker from the swarm
+        remove_agent: Remove a agent from the swarm
        broadcast: Broadcast a message to all agents
        reset: Reset the swarm
        plan: agents must individually plan using a workflow or pipeline
-        direct_message: Send a direct message to a worker
+        direct_message: Send a direct message to a agent
        autoscaler: Autoscaler that acts like kubernetes for autonomous agents
-        get_worker_by_id: Locate a worker by id
+        get_agent_by_id: Locate a agent by id
-        get_worker_by_name: Locate a worker by name
+        get_agent_by_name: Locate a agent by name
-        assign_task: Assign a task to a worker
+        assign_task: Assign a task to a agent
        get_all_tasks: Get all tasks
        get_finished_tasks: Get all finished tasks
        get_pending_tasks: Get all pending tasks
-        pause_worker: Pause a worker
+        pause_agent: Pause a agent
-        resume_worker: Resume a worker
+        resume_agent: Resume a agent
-        stop_worker: Stop a worker
+        stop_agent: Stop a agent
-        restart_worker: Restart worker
+        restart_agent: Restart agent
        scale_up: Scale up the number of agents
        scale_down: Scale down the number of agents
        scale_to: Scale to a specific number of agents
@ -57,12 +71,6 @@ class AbstractSwarm(ABC):
    """
    # TODO: Pass in abstract LLM class that can utilize Hf or Anthropic models, Move away from OPENAI
    # TODO: ADD Universal Communication Layer, a ocean vectorstore instance
    # TODO: BE MORE EXPLICIT ON TOOL USE, TASK DECOMPOSITION AND TASK COMPLETETION AND ALLOCATION
    # TODO: Add RLHF Data collection, ask user how the swarm is performing
    # TODO: Create an onboarding process if not settings are preconfigured like `from swarms import Swarm, Swarm()` => then initiate onboarding name your swarm + provide purpose + etc
    # @abstractmethod
    def __init__(self, agents: List[Agent], max_loops: int = 200):
        """Initialize the swarm with agents"""
@ -101,18 +109,18 @@ class AbstractSwarm(ABC):
        pass
    # @abstractmethod
-    def add_worker(self, worker: "AbstractWorker"):
+    def add_agent(self, agent: "Agent"):
-        """Add a worker to the swarm"""
+        """Add a agent to the swarm"""
        pass
    # @abstractmethod
-    def remove_worker(self, worker: "AbstractWorker"):
+    def remove_agent(self, agent: "Agent"):
-        """Remove a worker from the swarm"""
+        """Remove a agent from the swarm"""
        pass
    # @abstractmethod
    def broadcast(
-        self, message: str, sender: Optional["AbstractWorker"] = None
+        self, message: str, sender: Optional["Agent"] = None
    ):
        """Broadcast a message to all agents"""
        pass
@ -131,36 +139,36 @@ class AbstractSwarm(ABC):
    def direct_message(
        self,
        message: str,
-        sender: "AbstractWorker",
+        sender: "Agent",
-        recipient: "AbstractWorker",
+        recipient: "Agent",
    ):
-        """Send a direct message to a worker"""
+        """Send a direct message to a agent"""
        pass
    # @abstractmethod
-    def autoscaler(self, num_agents: int, worker: ["AbstractWorker"]):
+    def autoscaler(self, num_agents: int, agent: ["Agent"]):
        """Autoscaler that acts like kubernetes for autonomous agents"""
        pass
    # @abstractmethod
-    def get_worker_by_id(self, id: str) -> "AbstractWorker":
+    def get_agent_by_id(self, id: str) -> "Agent":
-        """Locate a worker by id"""
+        """Locate a agent by id"""
        pass
    # @abstractmethod
-    def get_worker_by_name(self, name: str) -> "AbstractWorker":
+    def get_agent_by_name(self, name: str) -> "Agent":
-        """Locate a worker by name"""
+        """Locate a agent by name"""
        pass
    # @abstractmethod
    def assign_task(
-        self, worker: "AbstractWorker", task: Any
+        self, agent: "Agent", task: Any
    ) -> Dict:
-        """Assign a task to a worker"""
+        """Assign a task to a agent"""
        pass
    # @abstractmethod
-    def get_all_tasks(self, worker: "AbstractWorker", task: Any):
+    def get_all_tasks(self, agent: "Agent", task: Any):
        """Get all tasks"""
    # @abstractmethod
@ -174,42 +182,42 @@ class AbstractSwarm(ABC):
        pass
    # @abstractmethod
-    def pause_worker(self, worker: "AbstractWorker", worker_id: str):
+    def pause_agent(self, agent: "Agent", agent_id: str):
-        """Pause a worker"""
+        """Pause a agent"""
        pass
    # @abstractmethod
-    def resume_worker(self, worker: "AbstractWorker", worker_id: str):
+    def resume_agent(self, agent: "Agent", agent_id: str):
-        """Resume a worker"""
+        """Resume a agent"""
        pass
    # @abstractmethod
-    def stop_worker(self, worker: "AbstractWorker", worker_id: str):
+    def stop_agent(self, agent: "Agent", agent_id: str):
-        """Stop a worker"""
+        """Stop a agent"""
        pass
    # @abstractmethod
-    def restart_worker(self, worker: "AbstractWorker"):
+    def restart_agent(self, agent: "Agent"):
-        """Restart worker"""
+        """Restart agent"""
        pass
    # @abstractmethod
-    def scale_up(self, num_worker: int):
+    def scale_up(self, num_agent: int):
        """Scale up the number of agents"""
        pass
    # @abstractmethod
-    def scale_down(self, num_worker: int):
+    def scale_down(self, num_agent: int):
        """Scale down the number of agents"""
        pass
    # @abstractmethod
-    def scale_to(self, num_worker: int):
+    def scale_to(self, num_agent: int):
        """Scale to a specific number of agents"""
        pass
    # @abstractmethod
-    def get_all_agents(self) -> List["AbstractWorker"]:
+    def get_all_agents(self) -> List["Agent"]:
        """Get all agents"""
        pass
--- a/tests/models/test_openaitts.py
+++ b/tests/models/test_openaitts.py
@ -0,0 +1,79 @@
 import pytest
 from unittest.mock import patch, MagicMock
 from swarms.models.openai_tts import OpenAITTS
 def test_openaitts_initialization():
    tts = OpenAITTS()
    assert isinstance(tts, OpenAITTS)
    assert tts.model_name == "tts-1-1106"
    assert tts.proxy_url == "https://api.openai.com/v1/audio/speech"
    assert tts.voice == "onyx"
    assert tts.chunk_size == 1024 * 1024
 def test_openaitts_initialization_custom_parameters():
    tts = OpenAITTS("custom_model", "custom_url", "custom_key", "custom_voice", 2048)
    assert tts.model_name == "custom_model"
    assert tts.proxy_url == "custom_url"
    assert tts.openai_api_key == "custom_key"
    assert tts.voice == "custom_voice"
    assert tts.chunk_size == 2048
@patch("requests.post")
 def test_run(mock_post):
    mock_response = MagicMock()
    mock_response.iter_content.return_value = [b"chunk1", b"chunk2"]
    mock_post.return_value = mock_response
    tts = OpenAITTS()
    audio = tts.run("Hello world")
    assert audio == b"chunk1chunk2"
    mock_post.assert_called_once_with(
        "https://api.openai.com/v1/audio/speech",
        headers={"Authorization": f"Bearer {tts.openai_api_key}"},
        json={"model": "tts-1-1106", "input": "Hello world", "voice": "onyx"},
    )
@patch("requests.post")
 def test_run_empty_task(mock_post):
    tts = OpenAITTS()
    with pytest.raises(Exception):
        tts.run("")
@patch("requests.post")
 def test_run_very_long_task(mock_post):
    tts = OpenAITTS()
    with pytest.raises(Exception):
        tts.run("A" * 10000)
@patch("requests.post")
 def test_run_invalid_task(mock_post):
    tts = OpenAITTS()
    with pytest.raises(Exception):
        tts.run(None)
@patch("requests.post")
 def test_run_custom_model(mock_post):
    mock_response = MagicMock()
    mock_response.iter_content.return_value = [b"chunk1", b"chunk2"]
    mock_post.return_value = mock_response
    tts = OpenAITTS("custom_model")
    audio = tts.run("Hello world")
    assert audio == b"chunk1chunk2"
    mock_post.assert_called_once_with(
        "https://api.openai.com/v1/audio/speech",
        headers={"Authorization": f"Bearer {tts.openai_api_key}"},
        json={"model": "custom_model", "input": "Hello world", "voice": "onyx"},
    )
@patch("requests.post")
 def test_run_custom_voice(mock_post):
    mock_response = MagicMock()
    mock_response.iter_content.return_value = [b"chunk1", b"chunk2"]
    mock_post.return_value = mock_response
    tts = OpenAITTS(voice="custom_voice")
    audio = tts.run("Hello world")
    assert audio == b"chunk1chunk2"
    mock_post.assert_called_once_with(
        "https://api.openai.com/v1/audio/speech",
        headers={"Authorization": f"Bearer {tts.openai_api_key}"},
        json={"model": "tts-1-1106", "input": "Hello world", "voice": "custom_voice"},
    )
--- a/tts_speech.py
+++ b/tts_speech.py
@ -0,0 +1,10 @@
 from swarms import OpenAITTS
 tts = OpenAITTS(
    model_name = "tts-1-1106",
    voice = "onyx",
    openai_api_key="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"
 )
 out = tts("Hello world")
 print(out)