You can not select more than 25 topics
Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
140 lines
4.8 KiB
140 lines
4.8 KiB
1 year ago
|
import pytest
|
||
|
import os
|
||
|
import torch
|
||
|
from swarms.models.speecht5 import SpeechT5
|
||
|
|
||
|
|
||
|
# Create fixtures if needed
|
||
|
@pytest.fixture
|
||
|
def speecht5_model():
|
||
|
return SpeechT5()
|
||
|
|
||
|
|
||
|
# Test cases for the SpeechT5 class
|
||
|
|
||
|
|
||
|
def test_speecht5_init(speecht5_model):
|
||
|
assert isinstance(speecht5_model.processor, SpeechT5.processor.__class__)
|
||
|
assert isinstance(speecht5_model.model, SpeechT5.model.__class__)
|
||
|
assert isinstance(speecht5_model.vocoder, SpeechT5.vocoder.__class__)
|
||
|
assert isinstance(speecht5_model.embeddings_dataset, torch.utils.data.Dataset)
|
||
|
|
||
|
|
||
|
def test_speecht5_call(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
speech = speecht5_model(text)
|
||
|
assert isinstance(speech, torch.Tensor)
|
||
|
|
||
|
|
||
|
def test_speecht5_save_speech(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
speech = speecht5_model(text)
|
||
|
filename = "test_speech.wav"
|
||
|
speecht5_model.save_speech(speech, filename)
|
||
|
assert os.path.isfile(filename)
|
||
|
os.remove(filename)
|
||
|
|
||
|
|
||
|
def test_speecht5_set_model(speecht5_model):
|
||
|
old_model_name = speecht5_model.model_name
|
||
|
new_model_name = "facebook/speecht5-tts"
|
||
|
speecht5_model.set_model(new_model_name)
|
||
|
assert speecht5_model.model_name == new_model_name
|
||
|
assert speecht5_model.processor.model_name == new_model_name
|
||
|
assert speecht5_model.model.config.model_name_or_path == new_model_name
|
||
|
speecht5_model.set_model(old_model_name) # Restore original model
|
||
|
|
||
|
|
||
|
def test_speecht5_set_vocoder(speecht5_model):
|
||
|
old_vocoder_name = speecht5_model.vocoder_name
|
||
|
new_vocoder_name = "facebook/speecht5-hifigan"
|
||
|
speecht5_model.set_vocoder(new_vocoder_name)
|
||
|
assert speecht5_model.vocoder_name == new_vocoder_name
|
||
|
assert speecht5_model.vocoder.config.model_name_or_path == new_vocoder_name
|
||
|
speecht5_model.set_vocoder(old_vocoder_name) # Restore original vocoder
|
||
|
|
||
|
|
||
|
def test_speecht5_set_embeddings_dataset(speecht5_model):
|
||
|
old_dataset_name = speecht5_model.dataset_name
|
||
|
new_dataset_name = "Matthijs/cmu-arctic-xvectors-test"
|
||
|
speecht5_model.set_embeddings_dataset(new_dataset_name)
|
||
|
assert speecht5_model.dataset_name == new_dataset_name
|
||
|
assert isinstance(speecht5_model.embeddings_dataset, torch.utils.data.Dataset)
|
||
|
speecht5_model.set_embeddings_dataset(old_dataset_name) # Restore original dataset
|
||
|
|
||
|
|
||
|
def test_speecht5_get_sampling_rate(speecht5_model):
|
||
|
sampling_rate = speecht5_model.get_sampling_rate()
|
||
|
assert sampling_rate == 16000
|
||
|
|
||
|
|
||
|
def test_speecht5_print_model_details(speecht5_model, capsys):
|
||
|
speecht5_model.print_model_details()
|
||
|
captured = capsys.readouterr()
|
||
|
assert "Model Name: " in captured.out
|
||
|
assert "Vocoder Name: " in captured.out
|
||
|
|
||
|
|
||
|
def test_speecht5_quick_synthesize(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
speech = speecht5_model.quick_synthesize(text)
|
||
|
assert isinstance(speech, list)
|
||
|
assert isinstance(speech[0], dict)
|
||
|
assert "audio" in speech[0]
|
||
|
|
||
|
|
||
|
def test_speecht5_change_dataset_split(speecht5_model):
|
||
|
split = "test"
|
||
|
speecht5_model.change_dataset_split(split)
|
||
|
assert speecht5_model.embeddings_dataset.split == split
|
||
|
|
||
|
|
||
|
def test_speecht5_load_custom_embedding(speecht5_model):
|
||
|
xvector = [0.1, 0.2, 0.3, 0.4, 0.5]
|
||
|
embedding = speecht5_model.load_custom_embedding(xvector)
|
||
|
assert torch.all(torch.eq(embedding, torch.tensor(xvector).unsqueeze(0)))
|
||
|
|
||
|
|
||
|
def test_speecht5_with_different_speakers(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
speakers = [7306, 5324, 1234]
|
||
|
for speaker_id in speakers:
|
||
|
speech = speecht5_model(text, speaker_id=speaker_id)
|
||
|
assert isinstance(speech, torch.Tensor)
|
||
|
|
||
|
|
||
|
def test_speecht5_save_speech_with_different_extensions(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
speech = speecht5_model(text)
|
||
|
extensions = [".wav", ".flac"]
|
||
|
for extension in extensions:
|
||
|
filename = f"test_speech{extension}"
|
||
|
speecht5_model.save_speech(speech, filename)
|
||
|
assert os.path.isfile(filename)
|
||
|
os.remove(filename)
|
||
|
|
||
|
|
||
|
def test_speecht5_invalid_speaker_id(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
invalid_speaker_id = 9999 # Speaker ID that does not exist in the dataset
|
||
|
with pytest.raises(IndexError):
|
||
|
speecht5_model(text, speaker_id=invalid_speaker_id)
|
||
|
|
||
|
|
||
|
def test_speecht5_invalid_save_path(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
speech = speecht5_model(text)
|
||
|
invalid_path = "/invalid_directory/test_speech.wav"
|
||
|
with pytest.raises(FileNotFoundError):
|
||
|
speecht5_model.save_speech(speech, invalid_path)
|
||
|
|
||
|
|
||
|
def test_speecht5_change_vocoder_model(speecht5_model):
|
||
|
text = "Hello, how are you?"
|
||
|
old_vocoder_name = speecht5_model.vocoder_name
|
||
|
new_vocoder_name = "facebook/speecht5-hifigan-ljspeech"
|
||
|
speecht5_model.set_vocoder(new_vocoder_name)
|
||
|
speech = speecht5_model(text)
|
||
|
assert isinstance(speech, torch.Tensor)
|
||
|
speecht5_model.set_vocoder(old_vocoder_name) # Restore original vocoder
|