From 7ec57fe39e7c10bfaf9ef6aabbf938b6a64b3a79 Mon Sep 17 00:00:00 2001 From: harshalmore31 Date: Tue, 8 Jul 2025 21:31:52 +0530 Subject: [PATCH] Refactor vision processing methods to streamline direct URL handling and remove deprecated functionality --- swarms/utils/litellm_wrapper.py | 92 +++++++++------------------------ 1 file changed, 24 insertions(+), 68 deletions(-) diff --git a/swarms/utils/litellm_wrapper.py b/swarms/utils/litellm_wrapper.py index ed06fc1e..d9832edb 100644 --- a/swarms/utils/litellm_wrapper.py +++ b/swarms/utils/litellm_wrapper.py @@ -328,47 +328,22 @@ class LiteLLM: # Only use direct URL for HTTP/HTTPS URLs if not image.startswith(("http://", "https://")): return False - - # Check if the model supports direct URL passing - # Most major providers (OpenAI, Anthropic, etc.) support direct URLs - model_lower = self.model_name.lower() - - # List of models/providers that support direct URL passing - url_supported_models = [ - "gpt-4", - "gpt-4o", - "gpt-4-vision", - "claude", - "anthropic", - "openai", - "gemini", - "vertex_ai", - ] - - # Check if any of the supported model patterns match - return any(pattern in model_lower for pattern in url_supported_models) - - def _is_local_model(self) -> bool: - """ - Check if the model is a local/custom model that might not support direct URLs. - Returns: - bool: True if it's likely a local model - """ + # Check for local/custom models that might not support direct URLs model_lower = self.model_name.lower() + local_indicators = ["localhost", "127.0.0.1", "local", "custom", "ollama", "llama-cpp"] - # Indicators of local/custom models - local_indicators = [ - "localhost", - "127.0.0.1", - "local", - "custom", - "ollama", - "llama-cpp", - ] + is_local = any(indicator in model_lower for indicator in local_indicators) or \ + (self.base_url is not None and any(indicator in self.base_url.lower() for indicator in local_indicators)) - return any(indicator in model_lower for indicator in local_indicators) or \ - (self.base_url is not None and any(indicator in self.base_url.lower() for indicator in local_indicators)) + if is_local: + return False + + # Use LiteLLM's supports_vision to check if model supports vision and direct URLs + try: + return supports_vision(model=self.model_name) + except Exception: + return False def vision_processing( self, task: str, image: str, messages: Optional[list] = None @@ -571,12 +546,21 @@ class LiteLLM: """ return self.run(task, *args, **kwargs) - async def arun(self, task: str, *args, **kwargs): + async def arun( + self, + task: str, + audio: Optional[str] = None, + img: Optional[str] = None, + *args, + **kwargs + ): """ Run the LLM model asynchronously for the given task. Args: task (str): The task to run the model for. + audio (str, optional): Audio input if any. Defaults to None. + img (str, optional): Image input if any. Defaults to None. *args: Additional positional arguments. **kwargs: Additional keyword arguments. @@ -584,9 +568,9 @@ class LiteLLM: str: The content of the response from the model. """ try: - messages = self._prepare_messages(task) + messages = self._prepare_messages(task=task, img=img) - # Prepare common completion parameters + # Base completion parameters completion_params = { "model": self.model_name, "messages": messages, @@ -698,31 +682,3 @@ class LiteLLM: f"Running {len(tasks)} tasks asynchronously in batches of {batch_size}" ) return await self._process_batch(tasks, batch_size) - - def get_vision_processing_info(self, image: str) -> dict: - """ - Get information about how the image will be processed for this model. - - This utility method helps users understand whether their image will be: - - Passed directly as URL (more efficient) - - Converted to base64 (fallback for unsupported models/local files) - - Args: - image (str): The image source (URL or file path) - - Returns: - dict: Information about the processing approach - """ - return { - "model_name": self.model_name, - "image_source": image, - "is_url": image.startswith(("http://", "https://")), - "is_local_file": not image.startswith(("http://", "https://", "data:")), - "will_use_direct_url": self._should_use_direct_url(image), - "supports_vision": supports_vision(model=self.model_name), - "processing_method": "direct_url" if self._should_use_direct_url(image) else "base64_conversion", - "benefits": { - "direct_url": "No server bandwidth/CPU usage for image processing", - "base64_conversion": "Works with local files and all model types" - } - }