Merge pull request #234 from elder-plinius/master

idea2img file and prompt, plus stablediffusion class fix
1 year ago · e09001f19d
parent a8449d2625 82137692c2
commit e09001f19d
3 changed files with 101 additions and 0 deletions
--- a/playground/demos/idea_2_img/idea2img.py
+++ b/playground/demos/idea_2_img/idea2img.py
@ -0,0 +1,74 @@
 import os
 import datetime
 from dotenv import load_dotenv
 from swarms.models.stable_diffusion import StableDiffusion
 from swarms.models.gpt4_vision_api import GPT4VisionAPI
 from swarms.models import OpenAIChat
 from swarms.structs import Agent
 # Load environment variables
 load_dotenv()
 openai_api_key = os.getenv("OPENAI_API_KEY")
 stability_api_key = os.getenv("STABILITY_API_KEY")
 # Initialize the models
 vision_api = GPT4VisionAPI(api_key=openai_api_key)
 sd_api = StableDiffusion(api_key=stability_api_key)
 gpt_api = OpenAIChat(openai_api_key=openai_api_key)
 class Idea2Image(Agent):
    def __init__(self, llm, vision_api):
        super().__init__(llm=llm)
        self.vision_api = vision_api
    def run(self, initial_prompt, num_iterations, run_folder):
        current_prompt = initial_prompt
        for i in range(num_iterations):
            print(f"Iteration {i}: Image generation and analysis")
            if i == 0:
                current_prompt = self.enrich_prompt(current_prompt)
                print(f"Enriched Prompt: {current_prompt}")
            img = sd_api.generate_and_move_image(current_prompt, i, run_folder)
            if not img:
                print("Failed to generate image")
                break
            print(f"Generated image at: {img}")
            analysis = self.vision_api.run(img, current_prompt) if img else None
            if analysis:
                current_prompt += ". " + analysis[:500]  # Ensure the analysis is concise
                print(f"Image Analysis: {analysis}")
            else:
                print(f"Failed to analyze image at: {img}")
    def enrich_prompt(self, prompt):
        enrichment_task = (
            "Create a concise and effective image generation prompt within 400 characters or less, "
            "based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'"
            f"{prompt}'\n\n"
            "Improve the prompt with any applicable details or keywords by considering the following aspects: \n"
            "1. Subject details (like actions, emotions, environment) \n"
            "2. Artistic style (such as surrealism, hyperrealism) \n"
            "3. Medium (digital painting, oil on canvas) \n"
            "4. Color themes and lighting (like warm colors, cinematic lighting) \n"
            "5. Composition and framing (close-up, wide-angle) \n"
            "6. Additional elements (like a specific type of background, weather conditions) \n"
            "7. Any other artistic or thematic details that can make the image more vivid and compelling."
        )
        llm_result = self.llm.generate([enrichment_task])
        return llm_result.generations[0][0].text[:500] if llm_result.generations else None
 # User input and setup
 user_prompt = input("Prompt for image generation: ")
 num_iterations = int(input("Enter the number of iterations for image improvement: "))
 run_folder = os.path.join("runs", datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
 os.makedirs(run_folder, exist_ok=True)
 # Initialize and run the agent
 idea2image_agent = Idea2Image(gpt_api, vision_api)
 idea2image_agent.run(user_prompt, num_iterations, run_folder)
 print("Image improvement process completed.")
--- a/swarms/models/stable_diffusion.py
+++ b/swarms/models/stable_diffusion.py
@ -2,6 +2,7 @@ import base64
 import os
 import requests
 import uuid
 import shutil
 from dotenv import load_dotenv
 from typing import List
@ -135,3 +136,15 @@ class StableDiffusion:
            image_paths.append(image_path)
        return image_paths
    def generate_and_move_image(self, prompt, iteration, folder_path):
        # Generate the image
        image_paths = self.run(prompt)
        if not image_paths:
            return None
        # Move the image to the specified folder
        src_image_path = image_paths[0]
        dst_image_path = os.path.join(folder_path, f"image_{iteration}.jpg")
        shutil.move(src_image_path, dst_image_path)
        return dst_image_path
--- a/swarms/prompts/idea2img.py
+++ b/swarms/prompts/idea2img.py
@ -0,0 +1,14 @@
 IMAGE_ENRICHMENT_PROMPT = (
            "Create a concise and effective image generation prompt within 400 characters or less, "
            "based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'"
            #f"{prompt}'\n\n"
            "Improve the prompt with any applicable details or keywords by considering the following aspects: \n"
            "1. Subject details (like actions, emotions, environment) \n"
            "2. Artistic style (such as surrealism, hyperrealism) \n"
            "3. Medium (digital painting, oil on canvas) \n"
            "4. Color themes and lighting (like warm colors, cinematic lighting) \n"
            "5. Composition and framing (close-up, wide-angle) \n"
            "6. Additional elements (like a specific type of background, weather conditions) \n"
            "7. Any other artistic or thematic details that can make the image more vivid and compelling."
        )