[OpenAITTS][Save to filepath] [FEAT][BaseTTSModel]

2 years ago · ec580fa60e
parent 01501ac2ad
commit ec580fa60e
6 changed files with 131 additions and 32 deletions
--- a/playground/models/tts_speech.py
+++ b/playground/models/tts_speech.py
@ -1,10 +1,10 @@
 from swarms import OpenAITTS

 tts = OpenAITTS(
-    model_name = "tts-1-1106",
-    voice = "onyx",
-    openai_api_key="sk"
+    model_name="tts-1-1106",
+    voice="onyx",
+    openai_api_key="YOUR_API_KEY",
 )

-out = tts("Hello world")
+out = tts.run_and_save("pliny is a girl and a chicken")
 print(out)
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -8,6 +8,7 @@ from swarms.models.openai_models import (
    AzureOpenAI,
    OpenAIChat,
 )  # noqa: E402
+
 # from swarms.models.vllm import vLLM  # noqa: E402

 # from swarms.models.zephyr import Zephyr  # noqa: E402
--- a/swarms/models/base_tts.py
+++ b/swarms/models/base_tts.py
@ -0,0 +1,41 @@
+import wave
+from typing import Optional
+from swarms.models.base_llm import AbstractLLM
+from abc import ABC, abstractmethod
+
+
+class BaseTTSModel(AbstractLLM):
+    def __init__(
+        self,
+        model_name,
+        voice,
+        chunk_size,
+        save_to_file: bool = False,
+        saved_filepath: Optional[str] = None,
+    ):
+        self.model_name = model_name
+        self.voice = voice
+        self.chunk_size = chunk_size
+
+    def save(self, filepath: Optional[str] = None):
+        pass
+
+    def load(self, filepath: Optional[str] = None):
+        pass
+
+    @abstractmethod
+    def run(self, task: str, *args, **kwargs):
+        pass
+
+    def save_to_file(self, speech_data, filename):
+        """Save the speech data to a file.
+
+        Args:
+            speech_data (bytes): The speech data.
+            filename (str): The path to the file where the speech will be saved.
+        """
+        with wave.open(filename, "wb") as file:
+            file.setnchannels(1)
+            file.setsampwidth(2)
+            file.setframerate(22050)
+            file.writeframes(speech_data)
--- a/swarms/models/openai_tts.py
+++ b/swarms/models/openai_tts.py
@ -1,19 +1,29 @@
 import os
-
+import sys
 import openai
 import requests
 from dotenv import load_dotenv
-
+import subprocess
 from swarms.models.base_llm import AbstractLLM

+try:
+    import wave
+except ImportError as error:
+    print(f"Import Error: {error} - Please install pyaudio")
+    subprocess.check_call(
+        [sys.executable, "-m", "pip", "install", "pyaudio"]
+    )
+
+
 # Load .env file
 load_dotenv()

+
 # OpenAI API Key env
 def openai_api_key_env():
    openai_api_key = os.getenv("OPENAI_API_KEY")
    return openai_api_key
-    
+

 class OpenAITTS(AbstractLLM):
    """OpenAI TTS model
@ -24,11 +34,11 @@ class OpenAITTS(AbstractLLM):
        openai_api_key (str): _description_
        voice (str): _description_
        chunk_size (_type_): _description_
-        
+
    Methods:
        run: _description_
-        
-        
+
+
    Examples:
    >>> from swarms.models.openai_tts import OpenAITTS
    >>> tts = OpenAITTS(
@ -38,15 +48,18 @@ class OpenAITTS(AbstractLLM):
    ...     voice = "onyx",
    ... )
    >>> tts.run("Hello world")
-    
+
    """
+
    def __init__(
        self,
        model_name: str = "tts-1-1106",
        proxy_url: str = "https://api.openai.com/v1/audio/speech",
        openai_api_key: str = openai_api_key_env,
        voice: str = "onyx",
-        chunk_size = 1024 * 1024,
+        chunk_size=1024 * 1024,
+        autosave: bool = False,
+        saved_filepath: str = None,
    ):
        super().__init__()
        self.model_name = model_name
@ -54,13 +67,12 @@ class OpenAITTS(AbstractLLM):
        self.openai_api_key = openai_api_key
        self.voice = voice
        self.chunk_size = chunk_size
-        
-    def run(
-        self,
-        task: str,
-        *args,
-        **kwargs
-    ):
+        self.autosave = autosave
+        self.saved_filepath = saved_filepath
+
+        self.saved_filepath = "runs/tts_speech.wav"
+
+    def run(self, task: str, *args, **kwargs):
        """Run the tts model

        Args:
@ -71,7 +83,7 @@ class OpenAITTS(AbstractLLM):
        """
        response = requests.post(
            self.proxy_url,
-            headers = {
+            headers={
                "Authorization": f"Bearer {self.openai_api_key}",
            },
            json={
@ -80,8 +92,30 @@ class OpenAITTS(AbstractLLM):
                "voice": self.voice,
            },
        )
-        
+
        audio = b""
-        for chunk in response.iter_content(chunk_size = 1024 * 1024):
+        for chunk in response.iter_content(chunk_size=1024 * 1024):
            audio += chunk
-        return audio
+        return audio
+
+    def run_and_save(self, task: str = None, *args, **kwargs):
+        """Run the TTS model and save the output to a file.
+
+        Args:
+            task (str): The text to be converted to speech.
+            filename (str): The path to the file where the speech will be saved.
+
+        Returns:
+            bytes: The speech data.
+        """
+        # Run the TTS model.
+        speech_data = self.run(task)
+
+        # Save the speech data to a file.
+        with wave.open(self.saved_filepath, "wb") as file:
+            file.setnchannels(1)
+            file.setsampwidth(2)
+            file.setframerate(22050)
+            file.writeframes(speech_data)
+
+        return speech_data
--- a/swarms/swarms/base.py
+++ b/swarms/swarms/base.py
@ -23,7 +23,6 @@ from termcolor import colored
 from swarms.structs.agent import Agent


-
 class AbstractSwarm(ABC):
    """
    Abstract Swarm Class for multi-agent systems
@ -161,9 +160,7 @@ class AbstractSwarm(ABC):
        pass

    # @abstractmethod
-    def assign_task(
-        self, agent: "Agent", task: Any
-    ) -> Dict:
+    def assign_task(self, agent: "Agent", task: Any) -> Dict:
        """Assign a task to a agent"""
        pass

--- a/tests/models/test_openaitts.py
+++ b/tests/models/test_openaitts.py
@ -2,6 +2,7 @@ import pytest
 from unittest.mock import patch, MagicMock
 from swarms.models.openai_tts import OpenAITTS

+
 def test_openaitts_initialization():
    tts = OpenAITTS()
    assert isinstance(tts, OpenAITTS)
@ -10,14 +11,22 @@ def test_openaitts_initialization():
    assert tts.voice == "onyx"
    assert tts.chunk_size == 1024 * 1024

+
 def test_openaitts_initialization_custom_parameters():
-    tts = OpenAITTS("custom_model", "custom_url", "custom_key", "custom_voice", 2048)
+    tts = OpenAITTS(
+        "custom_model",
+        "custom_url",
+        "custom_key",
+        "custom_voice",
+        2048,
+    )
    assert tts.model_name == "custom_model"
    assert tts.proxy_url == "custom_url"
    assert tts.openai_api_key == "custom_key"
    assert tts.voice == "custom_voice"
    assert tts.chunk_size == 2048

+
@patch("requests.post")
 def test_run(mock_post):
    mock_response = MagicMock()
@ -29,27 +38,35 @@ def test_run(mock_post):
    mock_post.assert_called_once_with(
        "https://api.openai.com/v1/audio/speech",
        headers={"Authorization": f"Bearer {tts.openai_api_key}"},
-        json={"model": "tts-1-1106", "input": "Hello world", "voice": "onyx"},
+        json={
+            "model": "tts-1-1106",
+            "input": "Hello world",
+            "voice": "onyx",
+        },
    )

+
@patch("requests.post")
 def test_run_empty_task(mock_post):
    tts = OpenAITTS()
    with pytest.raises(Exception):
        tts.run("")

+
@patch("requests.post")
 def test_run_very_long_task(mock_post):
    tts = OpenAITTS()
    with pytest.raises(Exception):
        tts.run("A" * 10000)

+
@patch("requests.post")
 def test_run_invalid_task(mock_post):
    tts = OpenAITTS()
    with pytest.raises(Exception):
        tts.run(None)

+
@patch("requests.post")
 def test_run_custom_model(mock_post):
    mock_response = MagicMock()
@ -61,9 +78,14 @@ def test_run_custom_model(mock_post):
    mock_post.assert_called_once_with(
        "https://api.openai.com/v1/audio/speech",
        headers={"Authorization": f"Bearer {tts.openai_api_key}"},
-        json={"model": "custom_model", "input": "Hello world", "voice": "onyx"},
+        json={
+            "model": "custom_model",
+            "input": "Hello world",
+            "voice": "onyx",
+        },
    )

+
@patch("requests.post")
 def test_run_custom_voice(mock_post):
    mock_response = MagicMock()
@ -75,5 +97,9 @@ def test_run_custom_voice(mock_post):
    mock_post.assert_called_once_with(
        "https://api.openai.com/v1/audio/speech",
        headers={"Authorization": f"Bearer {tts.openai_api_key}"},
-        json={"model": "tts-1-1106", "input": "Hello world", "voice": "custom_voice"},
-    )
+        json={
+            "model": "tts-1-1106",
+            "input": "Hello world",
+            "voice": "custom_voice",
+        },
+    )