From 3d89664193f61bbe31504c85418a749851a68ed6 Mon Sep 17 00:00:00 2001 From: harshalmore31 Date: Tue, 8 Jul 2025 22:58:16 +0530 Subject: [PATCH] Add vision support tests and enhance URL processing validation --- tests/utils/test_litellm_wrapper.py | 113 ++++++++++++++++++++++++++++ 1 file changed, 113 insertions(+) diff --git a/tests/utils/test_litellm_wrapper.py b/tests/utils/test_litellm_wrapper.py index 02e79c9f..3a657bae 100644 --- a/tests/utils/test_litellm_wrapper.py +++ b/tests/utils/test_litellm_wrapper.py @@ -201,6 +201,119 @@ def run_test_suite(): except Exception as e: log_test_result("Batched Run", False, str(e)) + # Test 8: Vision Support Check + try: + logger.info("Testing vision support check") + llm = LiteLLM(model_name="gpt-4o") + # This should not raise an error for vision-capable models + llm.check_if_model_supports_vision(img="test.jpg") + log_test_result("Vision Support Check", True) + except Exception as e: + log_test_result("Vision Support Check", False, str(e)) + + # Test 9: Direct URL Processing + try: + logger.info("Testing direct URL processing") + llm = LiteLLM(model_name="gpt-4o") + test_url = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true" + should_use_direct = llm._should_use_direct_url(test_url) + assert isinstance(should_use_direct, bool) + log_test_result("Direct URL Processing", True) + except Exception as e: + log_test_result("Direct URL Processing", False, str(e)) + + # Test 10: Message Preparation with Image + try: + logger.info("Testing message preparation with image") + llm = LiteLLM(model_name="gpt-4o") + # Mock image URL to test message structure + test_img = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true" + messages = llm._prepare_messages("Describe this image", img=test_img) + assert isinstance(messages, list) + assert len(messages) >= 1 + # Check if image content is properly structured + user_message = next((msg for msg in messages if msg["role"] == "user"), None) + assert user_message is not None + log_test_result("Message Preparation with Image", True) + except Exception as e: + log_test_result("Message Preparation with Image", False, str(e)) + + # Test 11: Vision Processing Methods + try: + logger.info("Testing vision processing methods") + llm = LiteLLM(model_name="gpt-4o") + messages = [] + + # Test OpenAI vision processing + processed_messages = llm.openai_vision_processing( + "Describe this image", + "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", + messages.copy() + ) + assert isinstance(processed_messages, list) + assert len(processed_messages) > 0 + + # Test Anthropic vision processing + llm_anthropic = LiteLLM(model_name="claude-3-5-sonnet-20241022") + processed_messages_anthropic = llm_anthropic.anthropic_vision_processing( + "Describe this image", + "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", + messages.copy() + ) + assert isinstance(processed_messages_anthropic, list) + assert len(processed_messages_anthropic) > 0 + + log_test_result("Vision Processing Methods", True) + except Exception as e: + log_test_result("Vision Processing Methods", False, str(e)) + + # Test 12: Local vs URL Detection + try: + logger.info("Testing local vs URL detection") + llm = LiteLLM(model_name="gpt-4o") + + # Test URL detection + url_test = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true" + is_url_direct = llm._should_use_direct_url(url_test) + + # Test local file detection + local_test = "/path/to/local/image.jpg" + is_local_direct = llm._should_use_direct_url(local_test) + + # URLs should potentially use direct, local files should not + assert isinstance(is_url_direct, bool) + assert isinstance(is_local_direct, bool) + assert is_local_direct == False # Local files should never use direct URL + + log_test_result("Local vs URL Detection", True) + except Exception as e: + log_test_result("Local vs URL Detection", False, str(e)) + + # Test 13: Vision Message Structure + try: + logger.info("Testing vision message structure") + llm = LiteLLM(model_name="gpt-4o") + messages = [] + + # Test message structure for image input + result = llm.vision_processing( + task="What do you see?", + image="https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", + messages=messages + ) + + assert isinstance(result, list) + assert len(result) > 0 + + # Verify the message contains both text and image components + user_msg = result[-1] # Last message should be user message + assert user_msg["role"] == "user" + assert "content" in user_msg + + log_test_result("Vision Message Structure", True) + except Exception as e: + log_test_result("Vision Message Structure", False, str(e)) + # Generate test report success_rate = (passed_tests / total_tests) * 100 logger.info("\n=== Test Suite Report ===")