From 3d89664193f61bbe31504c85418a749851a68ed6 Mon Sep 17 00:00:00 2001
From: harshalmore31 <harshalmore2468@gmail.com>
Date: Tue, 8 Jul 2025 22:58:16 +0530
Subject: [PATCH] Add vision support tests and enhance URL processing
 validation

---
 tests/utils/test_litellm_wrapper.py | 113 ++++++++++++++++++++++++++++
 1 file changed, 113 insertions(+)

diff --git a/tests/utils/test_litellm_wrapper.py b/tests/utils/test_litellm_wrapper.py
index 02e79c9f..3a657bae 100644
--- a/tests/utils/test_litellm_wrapper.py
+++ b/tests/utils/test_litellm_wrapper.py
@@ -201,6 +201,119 @@ def run_test_suite():
     except Exception as e:
         log_test_result("Batched Run", False, str(e))
 
+    # Test 8: Vision Support Check
+    try:
+        logger.info("Testing vision support check")
+        llm = LiteLLM(model_name="gpt-4o")
+        # This should not raise an error for vision-capable models
+        llm.check_if_model_supports_vision(img="test.jpg")
+        log_test_result("Vision Support Check", True)
+    except Exception as e:
+        log_test_result("Vision Support Check", False, str(e))
+
+    # Test 9: Direct URL Processing
+    try:
+        logger.info("Testing direct URL processing")
+        llm = LiteLLM(model_name="gpt-4o")
+        test_url = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true"
+        should_use_direct = llm._should_use_direct_url(test_url)
+        assert isinstance(should_use_direct, bool)
+        log_test_result("Direct URL Processing", True)
+    except Exception as e:
+        log_test_result("Direct URL Processing", False, str(e))
+
+    # Test 10: Message Preparation with Image
+    try:
+        logger.info("Testing message preparation with image")
+        llm = LiteLLM(model_name="gpt-4o")
+        # Mock image URL to test message structure
+        test_img = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true"
+        messages = llm._prepare_messages("Describe this image", img=test_img)
+        assert isinstance(messages, list)
+        assert len(messages) >= 1
+        # Check if image content is properly structured
+        user_message = next((msg for msg in messages if msg["role"] == "user"), None)
+        assert user_message is not None
+        log_test_result("Message Preparation with Image", True)
+    except Exception as e:
+        log_test_result("Message Preparation with Image", False, str(e))
+
+    # Test 11: Vision Processing Methods
+    try:
+        logger.info("Testing vision processing methods")
+        llm = LiteLLM(model_name="gpt-4o")
+        messages = []
+        
+        # Test OpenAI vision processing
+        processed_messages = llm.openai_vision_processing(
+            "Describe this image", 
+            "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", 
+            messages.copy()
+        )
+        assert isinstance(processed_messages, list)
+        assert len(processed_messages) > 0
+        
+        # Test Anthropic vision processing
+        llm_anthropic = LiteLLM(model_name="claude-3-5-sonnet-20241022")
+        processed_messages_anthropic = llm_anthropic.anthropic_vision_processing(
+            "Describe this image", 
+            "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true", 
+            messages.copy()
+        )
+        assert isinstance(processed_messages_anthropic, list)
+        assert len(processed_messages_anthropic) > 0
+        
+        log_test_result("Vision Processing Methods", True)
+    except Exception as e:
+        log_test_result("Vision Processing Methods", False, str(e))
+
+    # Test 12: Local vs URL Detection
+    try:
+        logger.info("Testing local vs URL detection")
+        llm = LiteLLM(model_name="gpt-4o")
+        
+        # Test URL detection
+        url_test = "https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true"
+        is_url_direct = llm._should_use_direct_url(url_test)
+        
+        # Test local file detection
+        local_test = "/path/to/local/image.jpg"
+        is_local_direct = llm._should_use_direct_url(local_test)
+        
+        # URLs should potentially use direct, local files should not
+        assert isinstance(is_url_direct, bool)
+        assert isinstance(is_local_direct, bool)
+        assert is_local_direct == False  # Local files should never use direct URL
+        
+        log_test_result("Local vs URL Detection", True)
+    except Exception as e:
+        log_test_result("Local vs URL Detection", False, str(e))
+
+    # Test 13: Vision Message Structure
+    try:
+        logger.info("Testing vision message structure")
+        llm = LiteLLM(model_name="gpt-4o")
+        messages = []
+        
+        # Test message structure for image input
+        result = llm.vision_processing(
+            task="What do you see?",
+            image="https://github.com/kyegomez/swarms/blob/master/swarms_logo_new.png?raw=true",
+            messages=messages
+        )
+        
+        assert isinstance(result, list)
+        assert len(result) > 0
+        
+        # Verify the message contains both text and image components
+        user_msg = result[-1]  # Last message should be user message
+        assert user_msg["role"] == "user"
+        assert "content" in user_msg
+        
+        log_test_result("Vision Message Structure", True)
+    except Exception as e:
+        log_test_result("Vision Message Structure", False, str(e))
+
     # Generate test report
     success_rate = (passed_tests / total_tests) * 100
     logger.info("\n=== Test Suite Report ===")