You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
114 lines
3.6 KiB
114 lines
3.6 KiB
import tempfile
|
|
from enum import Enum
|
|
from typing import Any, Dict, Union
|
|
|
|
from langchain.utils import get_from_dict_or_env
|
|
from pydantic import root_validator
|
|
|
|
from swarms.tools.tool import BaseTool
|
|
|
|
|
|
def _import_elevenlabs() -> Any:
|
|
try:
|
|
import elevenlabs
|
|
except ImportError as e:
|
|
raise ImportError(
|
|
"Cannot import elevenlabs, please install `pip install"
|
|
" elevenlabs`."
|
|
) from e
|
|
return elevenlabs
|
|
|
|
|
|
class ElevenLabsModel(str, Enum):
|
|
"""Models available for Eleven Labs Text2Speech."""
|
|
|
|
MULTI_LINGUAL = "eleven_multilingual_v1"
|
|
MONO_LINGUAL = "eleven_monolingual_v1"
|
|
|
|
|
|
class ElevenLabsText2SpeechTool(BaseTool):
|
|
"""Tool that queries the Eleven Labs Text2Speech API.
|
|
|
|
In order to set this up, follow instructions at:
|
|
https://docs.elevenlabs.io/welcome/introduction
|
|
|
|
Attributes:
|
|
model (ElevenLabsModel): The model to use for text to speech.
|
|
Defaults to ElevenLabsModel.MULTI_LINGUAL.
|
|
name (str): The name of the tool. Defaults to "eleven_labs_text2speech".
|
|
description (str): The description of the tool.
|
|
Defaults to "A wrapper around Eleven Labs Text2Speech. Useful for when you need to convert text to speech. It supports multiple languages, including English, German, Polish, Spanish, Italian, French, Portuguese, and Hindi."
|
|
|
|
|
|
Usage:
|
|
>>> from swarms.models import ElevenLabsText2SpeechTool
|
|
>>> stt = ElevenLabsText2SpeechTool()
|
|
>>> speech_file = stt.run("Hello world!")
|
|
>>> stt.play(speech_file)
|
|
>>> stt.stream_speech("Hello world!")
|
|
|
|
"""
|
|
|
|
model: Union[ElevenLabsModel, str] = ElevenLabsModel.MULTI_LINGUAL
|
|
|
|
name: str = "eleven_labs_text2speech"
|
|
description: str = (
|
|
"A wrapper around Eleven Labs Text2Speech. Useful for when"
|
|
" you need to convert text to speech. It supports multiple"
|
|
" languages, including English, German, Polish, Spanish,"
|
|
" Italian, French, Portuguese, and Hindi. "
|
|
)
|
|
|
|
@root_validator(pre=True)
|
|
def validate_environment(cls, values: Dict) -> Dict:
|
|
"""Validate that api key exists in environment."""
|
|
_ = get_from_dict_or_env(
|
|
values, "eleven_api_key", "ELEVEN_API_KEY"
|
|
)
|
|
|
|
return values
|
|
|
|
def _run(
|
|
self,
|
|
task: str,
|
|
) -> str:
|
|
"""Use the tool."""
|
|
elevenlabs = _import_elevenlabs()
|
|
try:
|
|
speech = elevenlabs.generate(text=task, model=self.model)
|
|
with tempfile.NamedTemporaryFile(
|
|
mode="bx", suffix=".wav", delete=False
|
|
) as f:
|
|
f.write(speech)
|
|
return f.name
|
|
except Exception as e:
|
|
raise RuntimeError(
|
|
f"Error while running ElevenLabsText2SpeechTool: {e}"
|
|
)
|
|
|
|
def play(self, speech_file: str) -> None:
|
|
"""Play the text as speech."""
|
|
elevenlabs = _import_elevenlabs()
|
|
with open(speech_file, mode="rb") as f:
|
|
speech = f.read()
|
|
|
|
elevenlabs.play(speech)
|
|
|
|
def stream_speech(self, query: str) -> None:
|
|
"""Stream the text as speech as it is generated.
|
|
Play the text in your speakers."""
|
|
elevenlabs = _import_elevenlabs()
|
|
speech_stream = elevenlabs.generate(
|
|
text=query, model=self.model, stream=True
|
|
)
|
|
elevenlabs.stream(speech_stream)
|
|
|
|
def save(self, speech_file: str, path: str) -> None:
|
|
"""Save the speech file to a path."""
|
|
raise NotImplementedError(
|
|
"Saving not implemented for this tool."
|
|
)
|
|
|
|
def __str__(self):
|
|
return "ElevenLabsText2SpeechTool"
|