Merge pull request #234 from elder-plinius/master

idea2img file and prompt, plus stablediffusion class fix
2 years ago · e09001f19d
parent a8449d2625 82137692c2
commit e09001f19d
3 changed files with 101 additions and 0 deletions
--- a/playground/demos/idea_2_img/idea2img.py
+++ b/playground/demos/idea_2_img/idea2img.py
@ -0,0 +1,74 @@
+import os
+import datetime
+from dotenv import load_dotenv
+from swarms.models.stable_diffusion import StableDiffusion
+from swarms.models.gpt4_vision_api import GPT4VisionAPI
+from swarms.models import OpenAIChat
+from swarms.structs import Agent
+
+# Load environment variables
+load_dotenv()
+openai_api_key = os.getenv("OPENAI_API_KEY")
+stability_api_key = os.getenv("STABILITY_API_KEY")
+
+# Initialize the models
+vision_api = GPT4VisionAPI(api_key=openai_api_key)
+sd_api = StableDiffusion(api_key=stability_api_key)
+gpt_api = OpenAIChat(openai_api_key=openai_api_key)
+
+class Idea2Image(Agent):
+    def __init__(self, llm, vision_api):
+        super().__init__(llm=llm)
+        self.vision_api = vision_api
+
+    def run(self, initial_prompt, num_iterations, run_folder):
+        current_prompt = initial_prompt
+
+        for i in range(num_iterations):
+            print(f"Iteration {i}: Image generation and analysis")
+
+            if i == 0:
+                current_prompt = self.enrich_prompt(current_prompt)
+                print(f"Enriched Prompt: {current_prompt}")
+
+            img = sd_api.generate_and_move_image(current_prompt, i, run_folder)
+            if not img:
+                print("Failed to generate image")
+                break
+            print(f"Generated image at: {img}")
+
+            analysis = self.vision_api.run(img, current_prompt) if img else None
+            if analysis:
+                current_prompt += ". " + analysis[:500]  # Ensure the analysis is concise
+                print(f"Image Analysis: {analysis}")
+            else:
+                print(f"Failed to analyze image at: {img}")
+
+    def enrich_prompt(self, prompt):
+        enrichment_task = (
+            "Create a concise and effective image generation prompt within 400 characters or less, "
+            "based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'"
+            f"{prompt}'\n\n"
+            "Improve the prompt with any applicable details or keywords by considering the following aspects: \n"
+            "1. Subject details (like actions, emotions, environment) \n"
+            "2. Artistic style (such as surrealism, hyperrealism) \n"
+            "3. Medium (digital painting, oil on canvas) \n"
+            "4. Color themes and lighting (like warm colors, cinematic lighting) \n"
+            "5. Composition and framing (close-up, wide-angle) \n"
+            "6. Additional elements (like a specific type of background, weather conditions) \n"
+            "7. Any other artistic or thematic details that can make the image more vivid and compelling."
+        )
+        llm_result = self.llm.generate([enrichment_task])
+        return llm_result.generations[0][0].text[:500] if llm_result.generations else None
+
+# User input and setup
+user_prompt = input("Prompt for image generation: ")
+num_iterations = int(input("Enter the number of iterations for image improvement: "))
+run_folder = os.path.join("runs", datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
+os.makedirs(run_folder, exist_ok=True)
+
+# Initialize and run the agent
+idea2image_agent = Idea2Image(gpt_api, vision_api)
+idea2image_agent.run(user_prompt, num_iterations, run_folder)
+
+print("Image improvement process completed.")
--- a/swarms/models/stable_diffusion.py
+++ b/swarms/models/stable_diffusion.py
@ -2,6 +2,7 @@ import base64
 import os
 import requests
 import uuid
+import shutil
 from dotenv import load_dotenv
 from typing import List

@ -135,3 +136,15 @@ class StableDiffusion:
            image_paths.append(image_path)

        return image_paths
+
+    def generate_and_move_image(self, prompt, iteration, folder_path):
+        # Generate the image
+        image_paths = self.run(prompt)
+        if not image_paths:
+            return None
+
+        # Move the image to the specified folder
+        src_image_path = image_paths[0]
+        dst_image_path = os.path.join(folder_path, f"image_{iteration}.jpg")
+        shutil.move(src_image_path, dst_image_path)
+        return dst_image_path
--- a/swarms/prompts/idea2img.py
+++ b/swarms/prompts/idea2img.py
@ -0,0 +1,14 @@
+IMAGE_ENRICHMENT_PROMPT = (
+            "Create a concise and effective image generation prompt within 400 characters or less, "
+            "based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'"
+            #f"{prompt}'\n\n"
+            "Improve the prompt with any applicable details or keywords by considering the following aspects: \n"
+            "1. Subject details (like actions, emotions, environment) \n"
+            "2. Artistic style (such as surrealism, hyperrealism) \n"
+            "3. Medium (digital painting, oil on canvas) \n"
+            "4. Color themes and lighting (like warm colors, cinematic lighting) \n"
+            "5. Composition and framing (close-up, wide-angle) \n"
+            "6. Additional elements (like a specific type of background, weather conditions) \n"
+            "7. Any other artistic or thematic details that can make the image more vivid and compelling."
+        )
+