|
|
|
import logging
|
|
|
|
import os
|
|
|
|
from unittest.mock import Mock
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
from dotenv import load_dotenv
|
|
|
|
from requests.exceptions import (
|
|
|
|
ConnectionError,
|
|
|
|
HTTPError,
|
|
|
|
RequestException,
|
|
|
|
Timeout,
|
|
|
|
)
|
|
|
|
|
|
|
|
from swarms.models.gpt4v import GPT4Vision, GPT4VisionResponse
|
|
|
|
|
|
|
|
load_dotenv
|
|
|
|
|
|
|
|
api_key = os.getenv("OPENAI_API_KEY")
|
|
|
|
|
|
|
|
|
|
|
|
# Mock the OpenAI client
|
|
|
|
@pytest.fixture
|
|
|
|
def mock_openai_client():
|
|
|
|
return Mock()
|
|
|
|
|
|
|
|
|
|
|
|
@pytest.fixture
|
|
|
|
def gpt4vision(mock_openai_client):
|
|
|
|
return GPT4Vision(client=mock_openai_client)
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_default_values():
|
|
|
|
# Arrange and Act
|
|
|
|
gpt4vision = GPT4Vision()
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert gpt4vision.max_retries == 3
|
|
|
|
assert gpt4vision.model == "gpt-4-vision-preview"
|
|
|
|
assert gpt4vision.backoff_factor == 2.0
|
|
|
|
assert gpt4vision.timeout_seconds == 10
|
|
|
|
assert gpt4vision.api_key is None
|
|
|
|
assert gpt4vision.quality == "low"
|
|
|
|
assert gpt4vision.max_tokens == 200
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_api_key_from_env_variable():
|
|
|
|
# Arrange
|
|
|
|
api_key = os.environ["OPENAI_API_KEY"]
|
|
|
|
|
|
|
|
# Act
|
|
|
|
gpt4vision = GPT4Vision()
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert gpt4vision.api_key == api_key
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_set_api_key():
|
|
|
|
# Arrange
|
|
|
|
gpt4vision = GPT4Vision(api_key=api_key)
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert gpt4vision.api_key == api_key
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_invalid_max_retries():
|
|
|
|
# Arrange and Act
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
GPT4Vision(max_retries=-1)
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_invalid_backoff_factor():
|
|
|
|
# Arrange and Act
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
GPT4Vision(backoff_factor=-1)
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_invalid_timeout_seconds():
|
|
|
|
# Arrange and Act
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
GPT4Vision(timeout_seconds=-1)
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_invalid_max_tokens():
|
|
|
|
# Arrange and Act
|
|
|
|
with pytest.raises(ValueError):
|
|
|
|
GPT4Vision(max_tokens=-1)
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_logger_initialized():
|
|
|
|
# Arrange
|
|
|
|
gpt4vision = GPT4Vision()
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert isinstance(gpt4vision.logger, logging.Logger)
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_process_img_nonexistent_file():
|
|
|
|
# Arrange
|
|
|
|
gpt4vision = GPT4Vision()
|
|
|
|
img_path = "nonexistent_image.jpg"
|
|
|
|
|
|
|
|
# Act and Assert
|
|
|
|
with pytest.raises(FileNotFoundError):
|
|
|
|
gpt4vision.process_img(img_path)
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_single_task_single_image_no_openai_client(
|
|
|
|
gpt4vision,
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
# Act and Assert
|
|
|
|
with pytest.raises(AttributeError):
|
|
|
|
gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_single_task_single_image_empty_response(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.return_value.choices = (
|
|
|
|
[]
|
|
|
|
)
|
|
|
|
|
|
|
|
# Act
|
|
|
|
response = gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert response.answer == ""
|
|
|
|
mock_openai_client.chat.completions.create.assert_called_once()
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_multiple_tasks_single_image_empty_responses(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
tasks = ["Describe this image.", "What's in this picture?"]
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.return_value.choices = (
|
|
|
|
[]
|
|
|
|
)
|
|
|
|
|
|
|
|
# Act
|
|
|
|
responses = gpt4vision(img_url, tasks)
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert all(response.answer == "" for response in responses)
|
|
|
|
assert (
|
|
|
|
mock_openai_client.chat.completions.create.call_count == 1
|
|
|
|
) # Should be called only once
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_single_task_single_image_timeout(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = Timeout(
|
|
|
|
"Request timed out"
|
|
|
|
)
|
|
|
|
|
|
|
|
# Act and Assert
|
|
|
|
with pytest.raises(Timeout):
|
|
|
|
gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_retry_with_success_after_timeout(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
# Simulate success after a timeout and retry
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = [
|
|
|
|
Timeout("Request timed out"),
|
|
|
|
{
|
|
|
|
"choices": [
|
|
|
|
{
|
|
|
|
"message": {
|
|
|
|
"content": {
|
|
|
|
"text": "A description of the image."
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
],
|
|
|
|
},
|
|
|
|
]
|
|
|
|
|
|
|
|
# Act
|
|
|
|
response = gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert response.answer == "A description of the image."
|
|
|
|
assert (
|
|
|
|
mock_openai_client.chat.completions.create.call_count == 2
|
|
|
|
) # Should be called twice
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_process_img():
|
|
|
|
# Arrange
|
|
|
|
img_path = "test_image.jpg"
|
|
|
|
gpt4vision = GPT4Vision()
|
|
|
|
|
|
|
|
# Act
|
|
|
|
img_data = gpt4vision.process_img(img_path)
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert img_data.startswith("/9j/") # Base64-encoded image data
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_single_task_single_image(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
expected_response = GPT4VisionResponse(
|
|
|
|
answer="A description of the image."
|
|
|
|
)
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.return_value.choices[
|
|
|
|
0
|
|
|
|
].text = expected_response.answer
|
|
|
|
|
|
|
|
# Act
|
|
|
|
response = gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert response == expected_response
|
|
|
|
mock_openai_client.chat.completions.create.assert_called_once()
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_single_task_multiple_images(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_urls = [
|
|
|
|
"https://example.com/image1.jpg",
|
|
|
|
"https://example.com/image2.jpg",
|
|
|
|
]
|
|
|
|
task = "Describe these images."
|
|
|
|
|
|
|
|
expected_response = GPT4VisionResponse(
|
|
|
|
answer="Descriptions of the images."
|
|
|
|
)
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.return_value.choices[
|
|
|
|
0
|
|
|
|
].text = expected_response.answer
|
|
|
|
|
|
|
|
# Act
|
|
|
|
response = gpt4vision(img_urls, [task])
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert response == expected_response
|
|
|
|
mock_openai_client.chat.completions.create.assert_called_once()
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_multiple_tasks_single_image(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
tasks = ["Describe this image.", "What's in this picture?"]
|
|
|
|
|
|
|
|
expected_responses = [
|
|
|
|
GPT4VisionResponse(answer="A description of the image."),
|
|
|
|
GPT4VisionResponse(answer="It contains various objects."),
|
|
|
|
]
|
|
|
|
|
|
|
|
def create_mock_response(response):
|
|
|
|
return {
|
|
|
|
"choices": [
|
|
|
|
{"message": {"content": {"text": response.answer}}}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = [
|
|
|
|
create_mock_response(response)
|
|
|
|
for response in expected_responses
|
|
|
|
]
|
|
|
|
|
|
|
|
# Act
|
|
|
|
responses = gpt4vision(img_url, tasks)
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert responses == expected_responses
|
|
|
|
assert (
|
|
|
|
mock_openai_client.chat.completions.create.call_count == 1
|
|
|
|
) # Should be called only once
|
|
|
|
|
|
|
|
def test_gpt4vision_call_multiple_tasks_single_image(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
tasks = ["Describe this image.", "What's in this picture?"]
|
|
|
|
|
|
|
|
expected_responses = [
|
|
|
|
GPT4VisionResponse(answer="A description of the image."),
|
|
|
|
GPT4VisionResponse(answer="It contains various objects."),
|
|
|
|
]
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = [
|
|
|
|
{
|
|
|
|
"choices": [
|
|
|
|
{
|
|
|
|
"message": {
|
|
|
|
"content": {
|
|
|
|
"text": expected_responses[i].answer
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
for i in range(len(expected_responses))
|
|
|
|
]
|
|
|
|
|
|
|
|
# Act
|
|
|
|
responses = gpt4vision(img_url, tasks)
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert responses == expected_responses
|
|
|
|
assert (
|
|
|
|
mock_openai_client.chat.completions.create.call_count == 1
|
|
|
|
) # Should be called only once
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_multiple_tasks_multiple_images(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_urls = [
|
|
|
|
"https://images.unsplash.com/photo-1694734479857-626882b6db37?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
|
|
|
|
"https://images.unsplash.com/photo-1694734479898-6ac4633158ac?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D",
|
|
|
|
]
|
|
|
|
tasks = ["Describe these images.", "What's in these pictures?"]
|
|
|
|
|
|
|
|
expected_responses = [
|
|
|
|
GPT4VisionResponse(answer="Descriptions of the images."),
|
|
|
|
GPT4VisionResponse(answer="They contain various objects."),
|
|
|
|
]
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = [
|
|
|
|
{
|
|
|
|
"choices": [
|
|
|
|
{"message": {"content": {"text": response.answer}}}
|
|
|
|
]
|
|
|
|
}
|
|
|
|
for response in expected_responses
|
|
|
|
]
|
|
|
|
|
|
|
|
# Act
|
|
|
|
responses = gpt4vision(img_urls, tasks)
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert responses == expected_responses
|
|
|
|
assert (
|
|
|
|
mock_openai_client.chat.completions.create.call_count == 1
|
|
|
|
) # Should be called only once
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_http_error(gpt4vision, mock_openai_client):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = (
|
|
|
|
HTTPError("HTTP Error")
|
|
|
|
)
|
|
|
|
|
|
|
|
# Act and Assert
|
|
|
|
with pytest.raises(HTTPError):
|
|
|
|
gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_request_error(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = (
|
|
|
|
RequestException("Request Error")
|
|
|
|
)
|
|
|
|
|
|
|
|
# Act and Assert
|
|
|
|
with pytest.raises(RequestException):
|
|
|
|
gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_connection_error(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = (
|
|
|
|
ConnectionError("Connection Error")
|
|
|
|
)
|
|
|
|
|
|
|
|
# Act and Assert
|
|
|
|
with pytest.raises(ConnectionError):
|
|
|
|
gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
|
|
|
|
def test_gpt4vision_call_retry_with_success(
|
|
|
|
gpt4vision, mock_openai_client
|
|
|
|
):
|
|
|
|
# Arrange
|
|
|
|
img_url = "https://images.unsplash.com/photo-1694734479942-8cc7f4660578?q=80&w=1287&auto=format&fit=crop&ixlib=rb-4.0.3&ixid=M3wxMjA3fDB8MHxwaG90by1wYWdlfHx8fGVufDB8fHx8fA%3D%3D"
|
|
|
|
task = "Describe this image."
|
|
|
|
|
|
|
|
# Simulate success after a retry
|
|
|
|
mock_openai_client.chat.completions.create.side_effect = [
|
|
|
|
RequestException("Temporary error"),
|
|
|
|
{
|
|
|
|
"choices": [{"text": "A description of the image."}]
|
|
|
|
}, # fixed dictionary syntax
|
|
|
|
]
|
|
|
|
|
|
|
|
# Act
|
|
|
|
response = gpt4vision(img_url, [task])
|
|
|
|
|
|
|
|
# Assert
|
|
|
|
assert response.answer == "A description of the image."
|
|
|
|
assert (
|
|
|
|
mock_openai_client.chat.completions.create.call_count == 2
|
|
|
|
) # Should be called twice
|