You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/tests/models/test_gpt4v.py

415 lines
12 KiB

import logging
import os
from unittest.mock import Mock
import pytest
from dotenv import load_dotenv
from requests.exceptions import (
ConnectionError,
HTTPError,
RequestException,
Timeout,
)
from swarms.models.gpt4v import GPT4Vision, GPT4VisionResponse
load_dotenv
api_key = os.getenv("OPENAI_API_KEY")
# Mock the OpenAI client
@pytest.fixture
def mock_openai_client():
return Mock()
@pytest.fixture
def gpt4vision(mock_openai_client):
return GPT4Vision(client=mock_openai_client)
def test_gpt4vision_default_values():
# Arrange and Act
gpt4vision = GPT4Vision()
# Assert
assert gpt4vision.max_retries == 3
assert gpt4vision.model == "gpt-4-vision-preview"
assert gpt4vision.backoff_factor == 2.0
assert gpt4vision.timeout_seconds == 10
assert gpt4vision.api_key is None
assert gpt4vision.quality == "low"
assert gpt4vision.max_tokens == 200
def test_gpt4vision_api_key_from_env_variable():
# Arrange
api_key = os.environ["OPENAI_API_KEY"]
# Act
gpt4vision = GPT4Vision()
# Assert
assert gpt4vision.api_key == api_key
def test_gpt4vision_set_api_key():
# Arrange
gpt4vision = GPT4Vision(api_key=api_key)
# Assert
assert gpt4vision.api_key == api_key
def test_gpt4vision_invalid_max_retries():
# Arrange and Act
with pytest.raises(ValueError):
GPT4Vision(max_retries=-1)
def test_gpt4vision_invalid_backoff_factor():
# Arrange and Act
with pytest.raises(ValueError):
GPT4Vision(backoff_factor=-1)
def test_gpt4vision_invalid_timeout_seconds():
# Arrange and Act
with pytest.raises(ValueError):
GPT4Vision(timeout_seconds=-1)
def test_gpt4vision_invalid_max_tokens():
# Arrange and Act
with pytest.raises(ValueError):
GPT4Vision(max_tokens=-1)
def test_gpt4vision_logger_initialized():
# Arrange
gpt4vision = GPT4Vision()
# Assert
assert isinstance(gpt4vision.logger, logging.Logger)
def test_gpt4vision_process_img_nonexistent_file():
# Arrange
gpt4vision = GPT4Vision()
img_path = "nonexistent_image.jpg"
# Act and Assert
with pytest.raises(FileNotFoundError):
gpt4vision.process_img(img_path)
def test_gpt4vision_call_single_task_single_image_no_openai_client(gpt4vision):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
# Act and Assert
with pytest.raises(AttributeError):
gpt4vision(img_url, [task])
def test_gpt4vision_call_single_task_single_image_empty_response(
gpt4vision, mock_openai_client
):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
mock_openai_client.chat.completions.create.return_value.choices = []
# Act
response = gpt4vision(img_url, [task])
# Assert
assert response.answer == ""
mock_openai_client.chat.completions.create.assert_called_once()
def test_gpt4vision_call_multiple_tasks_single_image_empty_responses(
gpt4vision, mock_openai_client
):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
tasks = ["Describe this image.", "What's in this picture?"]
mock_openai_client.chat.completions.create.return_value.choices = []
# Act
responses = gpt4vision(img_url, tasks)
# Assert
assert all(response.answer == "" for response in responses)
assert (
mock_openai_client.chat.completions.create.call_count == 1
) # Should be called only once
def test_gpt4vision_call_single_task_single_image_timeout(
gpt4vision, mock_openai_client
):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
mock_openai_client.chat.completions.create.side_effect = Timeout(
"Request timed out"
)
# Act and Assert
with pytest.raises(Timeout):
gpt4vision(img_url, [task])
def test_gpt4vision_call_retry_with_success_after_timeout(
gpt4vision, mock_openai_client
):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
# Simulate success after a timeout and retry
mock_openai_client.chat.completions.create.side_effect = [
Timeout("Request timed out"),
{
"choices": [
{
"message": {
"content": {"text": "A description of the image."}
}
}
],
},
]
# Act
response = gpt4vision(img_url, [task])
# Assert
assert response.answer == "A description of the image."
assert (
mock_openai_client.chat.completions.create.call_count == 2
) # Should be called twice
def test_gpt4vision_process_img():
# Arrange
img_path = "test_image.jpg"
gpt4vision = GPT4Vision()
# Act
img_data = gpt4vision.process_img(img_path)
# Assert
assert img_data.startswith("/9j/") # Base64-encoded image data
def test_gpt4vision_call_single_task_single_image(
gpt4vision, mock_openai_client
):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
expected_response = GPT4VisionResponse(answer="A description of the image.")
mock_openai_client.chat.completions.create.return_value.choices[0].text = (
expected_response.answer
)
# Act
response = gpt4vision(img_url, [task])
# Assert
assert response == expected_response
mock_openai_client.chat.completions.create.assert_called_once()
def test_gpt4vision_call_single_task_multiple_images(
gpt4vision, mock_openai_client
):
# Arrange
img_urls = [
"https://example.com/image1.jpg",
"https://example.com/image2.jpg",
]
task = "Describe these images."
expected_response = GPT4VisionResponse(answer="Descriptions of the images.")
mock_openai_client.chat.completions.create.return_value.choices[0].text = (
expected_response.answer
)
# Act
response = gpt4vision(img_urls, [task])
# Assert
assert response == expected_response
mock_openai_client.chat.completions.create.assert_called_once()
def test_gpt4vision_call_multiple_tasks_single_image(
gpt4vision, mock_openai_client
):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
tasks = ["Describe this image.", "What's in this picture?"]
expected_responses = [
GPT4VisionResponse(answer="A description of the image."),
GPT4VisionResponse(answer="It contains various objects."),
]
def create_mock_response(response):
return {
"choices": [{"message": {"content": {"text": response.answer}}}]
}
mock_openai_client.chat.completions.create.side_effect = [
create_mock_response(response) for response in expected_responses
]
# Act
responses = gpt4vision(img_url, tasks)
# Assert
assert responses == expected_responses
assert (
mock_openai_client.chat.completions.create.call_count == 1
) # Should be called only once
def test_gpt4vision_call_multiple_tasks_single_image(
gpt4vision, mock_openai_client
):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
tasks = ["Describe this image.", "What's in this picture?"]
expected_responses = [
GPT4VisionResponse(answer="A description of the image."),
GPT4VisionResponse(answer="It contains various objects."),
]
mock_openai_client.chat.completions.create.side_effect = [
{
"choices": [
{
"message": {
"content": {"text": expected_responses[i].answer}
}
}
]
}
for i in range(len(expected_responses))
]
# Act
responses = gpt4vision(img_url, tasks)
# Assert
assert responses == expected_responses
assert (
mock_openai_client.chat.completions.create.call_count == 1
) # Should be called only once
def test_gpt4vision_call_multiple_tasks_multiple_images(
gpt4vision, mock_openai_client
):
# Arrange
img_urls = [
"https://images.unsplash.com/photo-1694734479857-626882b6db37?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
"https://images.unsplash.com/photo-1694734479898-6ac4633158ac?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
]
tasks = ["Describe these images.", "What's in these pictures?"]
expected_responses = [
GPT4VisionResponse(answer="Descriptions of the images."),
GPT4VisionResponse(answer="They contain various objects."),
]
mock_openai_client.chat.completions.create.side_effect = [
{"choices": [{"message": {"content": {"text": response.answer}}}]}
for response in expected_responses
]
# Act
responses = gpt4vision(img_urls, tasks)
# Assert
assert responses == expected_responses
assert (
mock_openai_client.chat.completions.create.call_count == 1
) # Should be called only once
def test_gpt4vision_call_http_error(gpt4vision, mock_openai_client):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
mock_openai_client.chat.completions.create.side_effect = HTTPError(
"HTTP Error"
)
# Act and Assert
with pytest.raises(HTTPError):
gpt4vision(img_url, [task])
def test_gpt4vision_call_request_error(gpt4vision, mock_openai_client):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
mock_openai_client.chat.completions.create.side_effect = RequestException(
"Request Error"
)
# Act and Assert
with pytest.raises(RequestException):
gpt4vision(img_url, [task])
def test_gpt4vision_call_connection_error(gpt4vision, mock_openai_client):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
mock_openai_client.chat.completions.create.side_effect = ConnectionError(
"Connection Error"
)
# Act and Assert
with pytest.raises(ConnectionError):
gpt4vision(img_url, [task])
def test_gpt4vision_call_retry_with_success(gpt4vision, mock_openai_client):
# Arrange
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
task = "Describe this image."
# Simulate success after a retry
mock_openai_client.chat.completions.create.side_effect = [
RequestException("Temporary error"),
{
"choices": [{"text": "A description of the image."}]
}, # fixed dictionary syntax
]
# Act
response = gpt4vision(img_url, [task])
# Assert
assert response.answer == "A description of the image."
assert (
mock_openai_client.chat.completions.create.call_count == 2
) # Should be called twice