You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/tests/models/test_speech_t5.py

166 lines
5.0 KiB

import pytest
import os
import torch
from swarms.models.speecht5 import SpeechT5
# Create fixtures if needed
@pytest.fixture
def speecht5_model():
return SpeechT5()
# Test cases for the SpeechT5 class
def test_speecht5_init(speecht5_model):
assert isinstance(
speecht5_model.processor, SpeechT5.processor.__class__
)
assert isinstance(speecht5_model.model, SpeechT5.model.__class__)
assert isinstance(
speecht5_model.vocoder, SpeechT5.vocoder.__class__
)
assert isinstance(
speecht5_model.embeddings_dataset, torch.utils.data.Dataset
)
def test_speecht5_call(speecht5_model):
text = "Hello, how are you?"
speech = speecht5_model(text)
assert isinstance(speech, torch.Tensor)
def test_speecht5_save_speech(speecht5_model):
text = "Hello, how are you?"
speech = speecht5_model(text)
filename = "test_speech.wav"
speecht5_model.save_speech(speech, filename)
assert os.path.isfile(filename)
os.remove(filename)
def test_speecht5_set_model(speecht5_model):
old_model_name = speecht5_model.model_name
new_model_name = "facebook/speecht5-tts"
speecht5_model.set_model(new_model_name)
assert speecht5_model.model_name == new_model_name
assert speecht5_model.processor.model_name == new_model_name
assert (
speecht5_model.model.config.model_name_or_path
== new_model_name
)
speecht5_model.set_model(old_model_name) # Restore original model
def test_speecht5_set_vocoder(speecht5_model):
old_vocoder_name = speecht5_model.vocoder_name
new_vocoder_name = "facebook/speecht5-hifigan"
speecht5_model.set_vocoder(new_vocoder_name)
assert speecht5_model.vocoder_name == new_vocoder_name
assert (
speecht5_model.vocoder.config.model_name_or_path
== new_vocoder_name
)
speecht5_model.set_vocoder(
old_vocoder_name
) # Restore original vocoder
def test_speecht5_set_embeddings_dataset(speecht5_model):
old_dataset_name = speecht5_model.dataset_name
new_dataset_name = "Matthijs/cmu-arctic-xvectors-test"
speecht5_model.set_embeddings_dataset(new_dataset_name)
assert speecht5_model.dataset_name == new_dataset_name
assert isinstance(
speecht5_model.embeddings_dataset, torch.utils.data.Dataset
)
speecht5_model.set_embeddings_dataset(
old_dataset_name
) # Restore original dataset
def test_speecht5_get_sampling_rate(speecht5_model):
sampling_rate = speecht5_model.get_sampling_rate()
assert sampling_rate == 16000
def test_speecht5_print_model_details(speecht5_model, capsys):
speecht5_model.print_model_details()
captured = capsys.readouterr()
assert "Model Name: " in captured.out
assert "Vocoder Name: " in captured.out
def test_speecht5_quick_synthesize(speecht5_model):
text = "Hello, how are you?"
speech = speecht5_model.quick_synthesize(text)
assert isinstance(speech, list)
assert isinstance(speech[0], dict)
assert "audio" in speech[0]
def test_speecht5_change_dataset_split(speecht5_model):
split = "test"
speecht5_model.change_dataset_split(split)
assert speecht5_model.embeddings_dataset.split == split
def test_speecht5_load_custom_embedding(speecht5_model):
xvector = [0.1, 0.2, 0.3, 0.4, 0.5]
embedding = speecht5_model.load_custom_embedding(xvector)
assert torch.all(
torch.eq(embedding, torch.tensor(xvector).unsqueeze(0))
)
def test_speecht5_with_different_speakers(speecht5_model):
text = "Hello, how are you?"
speakers = [7306, 5324, 1234]
for speaker_id in speakers:
speech = speecht5_model(text, speaker_id=speaker_id)
assert isinstance(speech, torch.Tensor)
def test_speecht5_save_speech_with_different_extensions(
speecht5_model,
):
text = "Hello, how are you?"
speech = speecht5_model(text)
extensions = [".wav", ".flac"]
for extension in extensions:
filename = f"test_speech{extension}"
speecht5_model.save_speech(speech, filename)
assert os.path.isfile(filename)
os.remove(filename)
def test_speecht5_invalid_speaker_id(speecht5_model):
text = "Hello, how are you?"
invalid_speaker_id = (
9999 # Speaker ID that does not exist in the dataset
)
with pytest.raises(IndexError):
speecht5_model(text, speaker_id=invalid_speaker_id)
def test_speecht5_invalid_save_path(speecht5_model):
text = "Hello, how are you?"
speech = speecht5_model(text)
invalid_path = "/invalid_directory/test_speech.wav"
with pytest.raises(FileNotFoundError):
speecht5_model.save_speech(speech, invalid_path)
def test_speecht5_change_vocoder_model(speecht5_model):
text = "Hello, how are you?"
old_vocoder_name = speecht5_model.vocoder_name
new_vocoder_name = "facebook/speecht5-hifigan-ljspeech"
speecht5_model.set_vocoder(new_vocoder_name)
speech = speecht5_model(text)
assert isinstance(speech, torch.Tensor)
speecht5_model.set_vocoder(
old_vocoder_name
) # Restore original vocoder