diff --git a/playground/demos/idea_2_img/idea2img.py b/playground/demos/idea_2_img/idea2img.py new file mode 100644 index 00000000..185421cd --- /dev/null +++ b/playground/demos/idea_2_img/idea2img.py @@ -0,0 +1,74 @@ +import os +import datetime +from dotenv import load_dotenv +from swarms.models.stable_diffusion import StableDiffusion +from swarms.models.gpt4_vision_api import GPT4VisionAPI +from swarms.models import OpenAIChat +from swarms.structs import Agent + +# Load environment variables +load_dotenv() +openai_api_key = os.getenv("OPENAI_API_KEY") +stability_api_key = os.getenv("STABILITY_API_KEY") + +# Initialize the models +vision_api = GPT4VisionAPI(api_key=openai_api_key) +sd_api = StableDiffusion(api_key=stability_api_key) +gpt_api = OpenAIChat(openai_api_key=openai_api_key) + +class Idea2Image(Agent): + def __init__(self, llm, vision_api): + super().__init__(llm=llm) + self.vision_api = vision_api + + def run(self, initial_prompt, num_iterations, run_folder): + current_prompt = initial_prompt + + for i in range(num_iterations): + print(f"Iteration {i}: Image generation and analysis") + + if i == 0: + current_prompt = self.enrich_prompt(current_prompt) + print(f"Enriched Prompt: {current_prompt}") + + img = sd_api.generate_and_move_image(current_prompt, i, run_folder) + if not img: + print("Failed to generate image") + break + print(f"Generated image at: {img}") + + analysis = self.vision_api.run(img, current_prompt) if img else None + if analysis: + current_prompt += ". " + analysis[:500] # Ensure the analysis is concise + print(f"Image Analysis: {analysis}") + else: + print(f"Failed to analyze image at: {img}") + + def enrich_prompt(self, prompt): + enrichment_task = ( + "Create a concise and effective image generation prompt within 400 characters or less, " + "based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'" + f"{prompt}'\n\n" + "Improve the prompt with any applicable details or keywords by considering the following aspects: \n" + "1. Subject details (like actions, emotions, environment) \n" + "2. Artistic style (such as surrealism, hyperrealism) \n" + "3. Medium (digital painting, oil on canvas) \n" + "4. Color themes and lighting (like warm colors, cinematic lighting) \n" + "5. Composition and framing (close-up, wide-angle) \n" + "6. Additional elements (like a specific type of background, weather conditions) \n" + "7. Any other artistic or thematic details that can make the image more vivid and compelling." + ) + llm_result = self.llm.generate([enrichment_task]) + return llm_result.generations[0][0].text[:500] if llm_result.generations else None + +# User input and setup +user_prompt = input("Prompt for image generation: ") +num_iterations = int(input("Enter the number of iterations for image improvement: ")) +run_folder = os.path.join("runs", datetime.datetime.now().strftime("%Y%m%d_%H%M%S")) +os.makedirs(run_folder, exist_ok=True) + +# Initialize and run the agent +idea2image_agent = Idea2Image(gpt_api, vision_api) +idea2image_agent.run(user_prompt, num_iterations, run_folder) + +print("Image improvement process completed.")