Merge pull request #234 from elder-plinius/master
idea2img file and prompt, plus stablediffusion class fixpull/243/head
commit
e09001f19d
@ -0,0 +1,74 @@
|
||||
import os
|
||||
import datetime
|
||||
from dotenv import load_dotenv
|
||||
from swarms.models.stable_diffusion import StableDiffusion
|
||||
from swarms.models.gpt4_vision_api import GPT4VisionAPI
|
||||
from swarms.models import OpenAIChat
|
||||
from swarms.structs import Agent
|
||||
|
||||
# Load environment variables
|
||||
load_dotenv()
|
||||
openai_api_key = os.getenv("OPENAI_API_KEY")
|
||||
stability_api_key = os.getenv("STABILITY_API_KEY")
|
||||
|
||||
# Initialize the models
|
||||
vision_api = GPT4VisionAPI(api_key=openai_api_key)
|
||||
sd_api = StableDiffusion(api_key=stability_api_key)
|
||||
gpt_api = OpenAIChat(openai_api_key=openai_api_key)
|
||||
|
||||
class Idea2Image(Agent):
|
||||
def __init__(self, llm, vision_api):
|
||||
super().__init__(llm=llm)
|
||||
self.vision_api = vision_api
|
||||
|
||||
def run(self, initial_prompt, num_iterations, run_folder):
|
||||
current_prompt = initial_prompt
|
||||
|
||||
for i in range(num_iterations):
|
||||
print(f"Iteration {i}: Image generation and analysis")
|
||||
|
||||
if i == 0:
|
||||
current_prompt = self.enrich_prompt(current_prompt)
|
||||
print(f"Enriched Prompt: {current_prompt}")
|
||||
|
||||
img = sd_api.generate_and_move_image(current_prompt, i, run_folder)
|
||||
if not img:
|
||||
print("Failed to generate image")
|
||||
break
|
||||
print(f"Generated image at: {img}")
|
||||
|
||||
analysis = self.vision_api.run(img, current_prompt) if img else None
|
||||
if analysis:
|
||||
current_prompt += ". " + analysis[:500] # Ensure the analysis is concise
|
||||
print(f"Image Analysis: {analysis}")
|
||||
else:
|
||||
print(f"Failed to analyze image at: {img}")
|
||||
|
||||
def enrich_prompt(self, prompt):
|
||||
enrichment_task = (
|
||||
"Create a concise and effective image generation prompt within 400 characters or less, "
|
||||
"based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'"
|
||||
f"{prompt}'\n\n"
|
||||
"Improve the prompt with any applicable details or keywords by considering the following aspects: \n"
|
||||
"1. Subject details (like actions, emotions, environment) \n"
|
||||
"2. Artistic style (such as surrealism, hyperrealism) \n"
|
||||
"3. Medium (digital painting, oil on canvas) \n"
|
||||
"4. Color themes and lighting (like warm colors, cinematic lighting) \n"
|
||||
"5. Composition and framing (close-up, wide-angle) \n"
|
||||
"6. Additional elements (like a specific type of background, weather conditions) \n"
|
||||
"7. Any other artistic or thematic details that can make the image more vivid and compelling."
|
||||
)
|
||||
llm_result = self.llm.generate([enrichment_task])
|
||||
return llm_result.generations[0][0].text[:500] if llm_result.generations else None
|
||||
|
||||
# User input and setup
|
||||
user_prompt = input("Prompt for image generation: ")
|
||||
num_iterations = int(input("Enter the number of iterations for image improvement: "))
|
||||
run_folder = os.path.join("runs", datetime.datetime.now().strftime("%Y%m%d_%H%M%S"))
|
||||
os.makedirs(run_folder, exist_ok=True)
|
||||
|
||||
# Initialize and run the agent
|
||||
idea2image_agent = Idea2Image(gpt_api, vision_api)
|
||||
idea2image_agent.run(user_prompt, num_iterations, run_folder)
|
||||
|
||||
print("Image improvement process completed.")
|
@ -0,0 +1,14 @@
|
||||
IMAGE_ENRICHMENT_PROMPT = (
|
||||
"Create a concise and effective image generation prompt within 400 characters or less, "
|
||||
"based on Stable Diffusion and Dalle best practices. Starting prompt: \n\n'"
|
||||
#f"{prompt}'\n\n"
|
||||
"Improve the prompt with any applicable details or keywords by considering the following aspects: \n"
|
||||
"1. Subject details (like actions, emotions, environment) \n"
|
||||
"2. Artistic style (such as surrealism, hyperrealism) \n"
|
||||
"3. Medium (digital painting, oil on canvas) \n"
|
||||
"4. Color themes and lighting (like warm colors, cinematic lighting) \n"
|
||||
"5. Composition and framing (close-up, wide-angle) \n"
|
||||
"6. Additional elements (like a specific type of background, weather conditions) \n"
|
||||
"7. Any other artistic or thematic details that can make the image more vivid and compelling."
|
||||
)
|
||||
|
Loading…
Reference in new issue