From 9eabc91dfae6ad76d7b853e1dc64304ecc59507f Mon Sep 17 00:00:00 2001 From: pliny <133052465+elder-plinius@users.noreply.github.com> Date: Thu, 30 Nov 2023 18:57:01 -0800 Subject: [PATCH 1/3] Create idea2img.py --- playground/demos/idea_2_img/idea2img.py | 74 +++++++++++++++++++++++++ 1 file changed, 74 insertions(+) create mode 100644 playground/demos/idea_2_img/idea2img.py diff --git a/playground/demos/idea_2_img/idea2img.py b/playground/demos/idea_2_img/idea2img.py new file mode 100644 index 00000000..185421cd --- /dev/null +++ b/playground/demos/idea_2_img/idea2img.py @@ -0,0 +1,74 @@ +import os +import datetime +from dotenv import load_dotenv +from swarms.models.stable_diffusion import StableDiffusion +from swarms.models.gpt4_vision_api import GPT4VisionAPI +from swarms.models import OpenAIChat +from swarms.structs import Agent + +# Load environment variables +load_dotenv() +openai_api_key = os.getenv("OPENAI_API_KEY") +stability_api_key = os.getenv("STABILITY_API_KEY") + +# Initialize the models +vision_api = GPT4VisionAPI(api_key=openai_api_key) +sd_api = StableDiffusion(api_key=stability_api_key) +gpt_api = OpenAIChat(openai_api_key=openai_api_key) + +class Idea2Image(Agent): + def __init__(self, llm, vision_api): + super().__init__(llm=llm) + self.vision_api = vision_api + + def run(self, initial_prompt, num_iterations, run_folder): + current_prompt = initial_prompt + + for i in range(num_iterations): + print(f"Iteration {i}: Image generation and analysis") + + if i == 0: + current_prompt = self.enrich_prompt(current_prompt) + print(f"Enriched Prompt: {current_prompt}") + + img = sd_api.generate_and_move_image(current_prompt, i, run_folder) + if not img: + print("Failed to generate image") + break + print(f"Generated image at: {img}") + + analysis = self.vision_api.run(img, current_prompt) if img else None + if analysis: + current_prompt += ". " + analysis[:500] # Ensure the analysis is concise + print(f"Image Analysis: {analysis}") + else: + print(f"Failed to analyze image at: {img}") + + def enrich_prompt(self, prompt): + enrichment_task = ( + "Create a concise and effective image generation prompt within 400 characters or less, " + "based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'" + f"{prompt}'\n\n" + "Improve the prompt with any applicable details or keywords by considering the following aspects: \n" + "1. Subject details (like actions, emotions, environment) \n" + "2. Artistic style (such as surrealism, hyperrealism) \n" + "3. Medium (digital painting, oil on canvas) \n" + "4. Color themes and lighting (like warm colors, cinematic lighting) \n" + "5. Composition and framing (close-up, wide-angle) \n" + "6. Additional elements (like a specific type of background, weather conditions) \n" + "7. Any other artistic or thematic details that can make the image more vivid and compelling." + ) + llm_result = self.llm.generate([enrichment_task]) + return llm_result.generations[0][0].text[:500] if llm_result.generations else None + +# User input and setup +user_prompt = input("Prompt for image generation: ") +num_iterations = int(input("Enter the number of iterations for image improvement: ")) +run_folder = os.path.join("runs", datetime.datetime.now().strftime("%Y%m%d_%H%M%S")) +os.makedirs(run_folder, exist_ok=True) + +# Initialize and run the agent +idea2image_agent = Idea2Image(gpt_api, vision_api) +idea2image_agent.run(user_prompt, num_iterations, run_folder) + +print("Image improvement process completed.") From b382d6b6fc0fc9328a0ced4335176e87fa764b00 Mon Sep 17 00:00:00 2001 From: pliny <133052465+elder-plinius@users.noreply.github.com> Date: Thu, 30 Nov 2023 18:57:59 -0800 Subject: [PATCH 2/3] Create idea2img.py --- swarms/prompts/idea2img.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 swarms/prompts/idea2img.py diff --git a/swarms/prompts/idea2img.py b/swarms/prompts/idea2img.py new file mode 100644 index 00000000..9fc0f3fa --- /dev/null +++ b/swarms/prompts/idea2img.py @@ -0,0 +1,14 @@ +IMAGE_ENRICHMENT_PROMPT = ( + "Create a concise and effective image generation prompt within 400 characters or less, " + "based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'" + #f"{prompt}'\n\n" + "Improve the prompt with any applicable details or keywords by considering the following aspects: \n" + "1. Subject details (like actions, emotions, environment) \n" + "2. Artistic style (such as surrealism, hyperrealism) \n" + "3. Medium (digital painting, oil on canvas) \n" + "4. Color themes and lighting (like warm colors, cinematic lighting) \n" + "5. Composition and framing (close-up, wide-angle) \n" + "6. Additional elements (like a specific type of background, weather conditions) \n" + "7. Any other artistic or thematic details that can make the image more vivid and compelling." + ) + From 82137692c24f1e4a95169b218b5fa0d420c00e55 Mon Sep 17 00:00:00 2001 From: pliny <133052465+elder-plinius@users.noreply.github.com> Date: Thu, 30 Nov 2023 18:58:53 -0800 Subject: [PATCH 3/3] Update stable_diffusion.py --- swarms/models/stable_diffusion.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/swarms/models/stable_diffusion.py b/swarms/models/stable_diffusion.py index 0ba9b463..78d417a9 100644 --- a/swarms/models/stable_diffusion.py +++ b/swarms/models/stable_diffusion.py @@ -2,6 +2,7 @@ import base64 import os import requests import uuid +import shutil from dotenv import load_dotenv from typing import List @@ -135,3 +136,15 @@ class StableDiffusion: image_paths.append(image_path) return image_paths + + def generate_and_move_image(self, prompt, iteration, folder_path): + # Generate the image + image_paths = self.run(prompt) + if not image_paths: + return None + + # Move the image to the specified folder + src_image_path = image_paths[0] + dst_image_path = os.path.join(folder_path, f"image_{iteration}.jpg") + shutil.move(src_image_path, dst_image_path) + return dst_image_path