clean up

Former-commit-id: 583d9384edde63090c59f56ac050ff984bed0178
2 years ago · cd804d7c17
parent 6b2179e9a5
commit cd804d7c17
3 changed files with 52 additions and 80 deletions
--- a/swarms/agents/misc/utils.py
+++ b/swarms/agents/misc/utils.py
--- a/swarms/tools/main.py
+++ b/swarms/tools/main.py
@ -1300,6 +1300,56 @@ class VisualQuestionAnswering(BaseToolSet):

        return answer
    
+
+#========================> handlers/image
+import torch
+from PIL import Image
+from transformers import BlipForConditionalGeneration, BlipProcessor
+
+# from core.prompts.file import IMAGE_PROMPT
+from swarms.prompts.prompts import IMAGE_PROMPT
+
+from swarms.utils.utils import BaseHandler
+
+class ImageCaptioning(BaseHandler):
+    def __init__(self, device):
+        print("Initializing ImageCaptioning to %s" % device)
+        self.device = device
+        self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
+        self.processor = BlipProcessor.from_pretrained(
+            "Salesforce/blip-image-captioning-base"
+        )
+        self.model = BlipForConditionalGeneration.from_pretrained(
+            "Salesforce/blip-image-captioning-base", torch_dtype=self.torch_dtype
+        ).to(self.device)
+
+    def handle(self, filename: str):
+        img = Image.open(filename)
+        width, height = img.size
+        ratio = min(512 / width, 512 / height)
+        width_new, height_new = (round(width * ratio), round(height * ratio))
+        img = img.resize((width_new, height_new))
+        img = img.convert("RGB")
+        img.save(filename, "PNG")
+        print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
+
+        inputs = self.processor(Image.open(filename), return_tensors="pt").to(
+            self.device, self.torch_dtype
+        )
+        out = self.model.generate(**inputs)
+        description = self.processor.decode(out[0], skip_special_tokens=True)
+        print(
+            f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text: {description}"
+        )
+
+        return IMAGE_PROMPT.format(filename=filename, description=description)
+    
+
+
+
+
+
+
 #segment anything:

 ########################### MODELS
--- a/swarms/utils/utils.py
+++ b/swarms/utils/utils.py
@ -225,7 +225,7 @@ class AbstractUploader(ABC):


 #========================= upload s3
-import os
+

 import boto3

@ -262,7 +262,6 @@ class S3Uploader(AbstractUploader):
 #========================= upload s3

 #========================> upload/static
-import os
 import shutil
 from pathlib import Path

@ -291,11 +290,9 @@ class StaticUploader(AbstractUploader):


 #========================> handlers/base
-import os
-import shutil
+
 import uuid
 from enum import Enum
-from pathlib import Path
 from typing import Dict

 import requests
@ -402,7 +399,6 @@ class FileHandler:


 #############===========================>
-import pandas as pd

 from swarms.prompts.prompts import DATAFRAME_PROMPT

@ -425,77 +421,3 @@ class CsvToDataframe(BaseHandler):



-
-#========================> handlers/image
-import torch
-from PIL import Image
-from transformers import BlipForConditionalGeneration, BlipProcessor
-
-# from core.prompts.file import IMAGE_PROMPT
-from swarms.prompts.prompts import IMAGE_PROMPT
-
-
-
-class ImageCaptioning(BaseHandler):
-    def __init__(self, device):
-        print("Initializing ImageCaptioning to %s" % device)
-        self.device = device
-        self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
-        self.processor = BlipProcessor.from_pretrained(
-            "Salesforce/blip-image-captioning-base"
-        )
-        self.model = BlipForConditionalGeneration.from_pretrained(
-            "Salesforce/blip-image-captioning-base", torch_dtype=self.torch_dtype
-        ).to(self.device)
-
-    def handle(self, filename: str):
-        img = Image.open(filename)
-        width, height = img.size
-        ratio = min(512 / width, 512 / height)
-        width_new, height_new = (round(width * ratio), round(height * ratio))
-        img = img.resize((width_new, height_new))
-        img = img.convert("RGB")
-        img.save(filename, "PNG")
-        print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
-
-        inputs = self.processor(Image.open(filename), return_tensors="pt").to(
-            self.device, self.torch_dtype
-        )
-        out = self.model.generate(**inputs)
-        description = self.processor.decode(out[0], skip_special_tokens=True)
-        print(
-            f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text: {description}"
-        )
-
-        return IMAGE_PROMPT.format(filename=filename, description=description)
-    
-
-
-
-# from autogpt.agent import Agent
-# from swarms.agents.swarms import worker_node
-
-# class MultiAgent(worker_node):
-
-#     def __init__(
-#             self,
-#             ai_name,
-#             memory,
-#             full_message_history,
-#             prompt,
-#             user_input,
-#             agent_id
-#     ):
-#         super().__init__(
-#             ai_name=ai_name,
-#             memory=memory,
-#             full_message_history=full_message_history,
-#             next_action_count=0,
-#             prompt=prompt,
-#             user_input=user_input,
-#         )
-#         self.agent_id = agent_id
-#         self.auditory_buffer = []  # contains the non processed parts of the conversation
-
-#     def receive_message(self, speaker, message):
-#         self.auditory_buffer.append((speaker.ai_name, message))