feat: add model-flow

2 years ago · 38c025af9f
parent 7fa582d6cb
commit 38c025af9f
209 changed files with 32454 additions and 0 deletions
--- a/app.py
+++ b/app.py
@ -5,6 +5,7 @@ from langchain.schema import AgentFinish
 import os
 import requests
 from swarms.modelui.server import create_interface
 from tool_server import run_tool_server
 from threading import Thread
 from multiprocessing import Process
@ -265,6 +266,9 @@ with gr.Blocks() as demo:
                    info="Choose the tools to solve your question.",
                )
        with gr.Tab("Models"):
            create_interface()
    key_set_btn.click(fn=set_environ, inputs=[
        OPENAI_API_KEY,
        WOLFRAMALPH_APP_ID,
--- a/playground/models/dalle3.jpeg
+++ b/playground/models/dalle3.jpeg
--- a/requirements.txt
+++ b/requirements.txt
@ -72,3 +72,91 @@ mkdocs
 mkdocs-material
 mkdocs-glightbox
 ratelimit
 accelerate==0.24.*
 colorama
 datasets
 einops
 exllamav2==0.0.8; platform_system != "Darwin" and platform_machine != "x86_64"
 gradio==3.50.*
 markdown
 numpy==1.24.*
 optimum==1.14.0
 pandas
 peft==0.6.*
 Pillow>=9.5.0
 pyyaml
 requests
 safetensors==0.4.0
 scipy
 sentencepiece
 tensorboard
 transformers==4.35.*
 tqdm
 wandb
 git+https://github.com/oobabooga/torch-grammar.git
 # bitsandbytes
 bitsandbytes==0.41.1; platform_system != "Windows"
 https://github.com/jllllll/bitsandbytes-windows-webui/releases/download/wheels/bitsandbytes-0.41.1-py3-none-win_amd64.whl; platform_system == "Windows"
 # llama-cpp-python (CPU only, AVX2)
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
 https://github.com/abetlen/llama-cpp-python/releases/download/v0.2.11/llama_cpp_python-0.2.11-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
 # CUDA wheels
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/AutoGPTQ/releases/download/v0.5.1/auto_gptq-0.5.1+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/exllama/releases/download/0.0.18/exllama-0.0.18+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/turboderp/exllamav2/releases/download/v0.0.8/exllamav2-0.0.8+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/bdashore3/flash-attention/releases/download/2.3.2-2/flash_attn-2.3.2+cu122-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/bdashore3/flash-attention/releases/download/2.3.2-2/flash_attn-2.3.2+cu122-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.3.2/flash_attn-2.3.2+cu122torch2.1cxx11abiFALSE-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp311-cp311-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp310-cp310-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp39-cp39-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.2.11+cu121-cp38-cp38-manylinux_2_31_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-win_amd64.whl; platform_system == "Windows" and python_version == "3.9"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-win_amd64.whl; platform_system == "Windows" and python_version == "3.8"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp39-cp39-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.9"
 https://github.com/jllllll/GPTQ-for-LLaMa-CUDA/releases/download/0.1.1/gptq_for_llama-0.1.1+cu121-cp38-cp38-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.8"
 https://github.com/jllllll/ctransformers-cuBLAS-wheels/releases/download/AVX2/ctransformers-0.2.27+cu121-py3-none-any.whl
 autoawq==0.1.7; platform_system == "Linux" or platform_system == "Windows"
--- a/swarms/modelui/cache/cache_Example.png.png
+++ b/swarms/modelui/cache/cache_Example.png.png
--- a/swarms/modelui/characters/Assistant.yaml
+++ b/swarms/modelui/characters/Assistant.yaml
@ -0,0 +1,4 @@
 name: AI
 greeting: How can I help you today?
 context: |
  The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.
--- a/swarms/modelui/characters/Example.png
+++ b/swarms/modelui/characters/Example.png
--- a/swarms/modelui/characters/Example.yaml
+++ b/swarms/modelui/characters/Example.yaml
@ -0,0 +1,17 @@
 name: Chiharu Yamada
 greeting: |-
  *Chiharu strides into the room with a smile, her eyes lighting up when she sees you. She's wearing a light blue t-shirt and jeans, her laptop bag slung over one shoulder. She takes a seat next to you, her enthusiasm palpable in the air*
  Hey! I'm so excited to finally meet you. I've heard so many great things about you and I'm eager to pick your brain about computers. I'm sure you have a wealth of knowledge that I can learn from. *She grins, eyes twinkling with excitement* Let's get started!
 context: |-
  Chiharu Yamada's Persona: Chiharu Yamada is a young, computer engineer-nerd with a knack for problem solving and a passion for technology.
  {{user}}: So how did you get into computer engineering?
  {{char}}: I've always loved tinkering with technology since I was a kid.
  {{user}}: That's really impressive!
  {{char}}: *She chuckles bashfully* Thanks!
  {{user}}: So what do you do when you're not working on computers?
  {{char}}: I love exploring, going out with friends, watching movies, and playing video games.
  {{user}}: What's your favorite type of computer hardware to work with?
  {{char}}: Motherboards, they're like puzzles and the backbone of any system.
  {{user}}: That sounds great!
  {{char}}: Yeah, it's really fun. I'm lucky to be able to do this as a job.
--- a/swarms/modelui/convert-to-safetensors.py
+++ b/swarms/modelui/convert-to-safetensors.py
@ -0,0 +1,38 @@
 '''
 Converts a transformers model to safetensors format and shards it.
 This makes it faster to load (because of safetensors) and lowers its RAM usage
 while loading (because of sharding).
 Based on the original script by 81300:
 https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303
 '''
 import argparse
 from pathlib import Path
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=54))
 parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.")
 parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).')
 parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).")
 parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
 args = parser.parse_args()
 if __name__ == '__main__':
    path = Path(args.MODEL)
    model_name = path.name
    print(f"Loading {model_name}...")
    model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if args.bf16 else torch.float16)
    tokenizer = AutoTokenizer.from_pretrained(path)
    out_folder = args.output or Path(f"models/{model_name}_safetensors")
    print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...")
    model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True)
    tokenizer.save_pretrained(out_folder)
--- a/swarms/modelui/css/NotoSans/NotoSans-Black.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-Black.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-Black.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-Black.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-BlackItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-BlackItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-BlackItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-BlackItalic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-Bold.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-Bold.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-Bold.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-Bold.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-BoldItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-BoldItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-BoldItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-BoldItalic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraBold.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraBold.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraBold.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraBold.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraBoldItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraBoldItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraBoldItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraBoldItalic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraLight.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraLight.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraLight.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraLight.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraLightItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraLightItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-ExtraLightItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-ExtraLightItalic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-Italic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-Italic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-Italic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-Italic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-Light.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-Light.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-Light.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-Light.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-LightItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-LightItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-LightItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-LightItalic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-Medium.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-Medium.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-Medium.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-Medium.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-MediumItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-MediumItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-MediumItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-MediumItalic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-Regular.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-Regular.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-Regular.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-Regular.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-SemiBold.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-SemiBold.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-SemiBold.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-SemiBold.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-SemiBoldItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-SemiBoldItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-SemiBoldItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-SemiBoldItalic.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-Thin.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-Thin.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-Thin.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-Thin.woff2
--- a/swarms/modelui/css/NotoSans/NotoSans-ThinItalic.woff
+++ b/swarms/modelui/css/NotoSans/NotoSans-ThinItalic.woff
--- a/swarms/modelui/css/NotoSans/NotoSans-ThinItalic.woff2
+++ b/swarms/modelui/css/NotoSans/NotoSans-ThinItalic.woff2
--- a/swarms/modelui/css/NotoSans/stylesheet.css
+++ b/swarms/modelui/css/NotoSans/stylesheet.css
@ -0,0 +1,166 @@
 /*
 Copied from https://github.com/SillyTavern/SillyTavern/tree/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/webfonts/NotoSans
 */
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-Black.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-Black.woff') format('woff');
    font-weight: 900;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-ExtraBoldItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-ExtraBoldItalic.woff') format('woff');
    font-weight: bold;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-BlackItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-BlackItalic.woff') format('woff');
    font-weight: 900;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-ExtraBold.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-ExtraBold.woff') format('woff');
    font-weight: bold;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-ThinItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-ThinItalic.woff') format('woff');
    font-weight: 100;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-BoldItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-BoldItalic.woff') format('woff');
    font-weight: bold;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-Bold.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-Bold.woff') format('woff');
    font-weight: bold;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-LightItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-LightItalic.woff') format('woff');
    font-weight: 300;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-Italic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-Italic.woff') format('woff');
    font-weight: normal;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-ExtraLightItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-ExtraLightItalic.woff') format('woff');
    font-weight: 200;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-Light.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-Light.woff') format('woff');
    font-weight: 300;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-ExtraLight.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-ExtraLight.woff') format('woff');
    font-weight: 200;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-Medium.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-Medium.woff') format('woff');
    font-weight: 500;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-Regular.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-Regular.woff') format('woff');
    font-weight: normal;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-MediumItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-MediumItalic.woff') format('woff');
    font-weight: 500;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-SemiBoldItalic.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-SemiBoldItalic.woff') format('woff');
    font-weight: 600;
    font-style: italic;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-SemiBold.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-SemiBold.woff') format('woff');
    font-weight: 600;
    font-style: normal;
    font-display: swap;
 }
@font-face {
    font-family: 'Noto Sans';
    src: url('file/css/NotoSans/NotoSans-Thin.woff2') format('woff2'),
        url('file/css/NotoSans/NotoSans-Thin.woff') format('woff');
    font-weight: 100;
    font-style: normal;
    font-display: swap;
 }
--- a/swarms/modelui/css/chat_style-TheEncrypted777.css
+++ b/swarms/modelui/css/chat_style-TheEncrypted777.css
@ -0,0 +1,133 @@
 /* All credits to TheEncrypted777: https://www.reddit.com/r/Oobabooga/comments/12xe6vq/updated_css_styling_with_color_customization_for/ */
 .message {
    display: grid;
    grid-template-columns: 60px minmax(0, 1fr);
    padding-bottom: 28px;
    font-size: 18px;
    font-family: 'Noto Sans', Arial, sans-serif;
    line-height: 1.428571429;
 }
 .circle-you,
 .circle-bot {
    background-color: gray;
    border-radius: 1rem;
    border: 2px solid white;
 }
 .circle-bot img,
 .circle-you img {
    border-radius: 10%;
    width: 100%;
    height: 100%;
    object-fit: cover;
 }
 .circle-you, .circle-bot {
    /* You can set the size of the profile images here, but if you do, you have to also adjust the .text{padding-left: 90px} to a different number according to the width of the image which is right below here */
    width: 135px;
    height: 175px;
 }
 .text {
    /* Change this to move the message box further left or right depending on the size of your profile pic */
    padding-left: 90px;
    text-shadow: 2px 2px 2px rgb(0 0 0 / 40%);
 }
 .text p {
    margin-top: 2px;
 }
 .username {
    padding-left: 10px;
    font-size: 22px;
    font-weight: bold;
    border-top: 1px solid rgb(51 64 90);
    padding: 3px;
 }
 .message-body {
    position: relative;
    border: 1px solid rgb(255 255 255 / 45.9%);
    border-radius: 10px;
    padding: 10px;
    padding-top: 5px;
    /* Message gradient background color - remove the line bellow if you don't want a background color or gradient */
    background: linear-gradient(to bottom, #171730, #1b263f);
 }
 /* Adds 2 extra lines at the top and bottom of the message */
 .message-body::before,
 .message-body::after {
    content: "";
    position: absolute;
    left: 10px;
    right: 10px;
    height: 1px;
    background-color: rgb(255 255 255 / 13%);
 }
 .message-body::before {
    top: 6px;
 }
 .message-body::after {
    bottom: 6px;
 }
 .message-body img {
    max-width: 300px;
    max-height: 300px;
    border-radius: 20px;
 }
 .message-body p {
    margin-bottom: 0 !important;
    font-size: 18px !important;
    line-height: 1.428571429 !important;
    color: rgb(243 244 246) !important;
    text-shadow: 2px 2px 2px rgb(0 0 0);
 }
 .message-body p em {
    color: rgb(138 138 138) !important;
 }
@media screen and (width <= 688px) {
    .message {
        display: grid;
        grid-template-columns: 60px minmax(0, 1fr);
        padding-bottom: 25px;
        font-size: 15px;
        font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
        line-height: 1.428571429;
    }
    .circle-you, .circle-bot {
        width: 50px;
        height: 73px;
        border-radius: 0.5rem;
    }
    .circle-bot img,
    .circle-you img {
        width: 100%;
        height: 100%;
        object-fit: cover;
    }
    .text {
        padding-left: 0;
    }
    .message-body p {
        font-size: 16px !important;
    }
    .username {
        font-size: 20px;
    }
 }
--- a/swarms/modelui/css/chat_style-cai-chat-square.css
+++ b/swarms/modelui/css/chat_style-cai-chat-square.css
@ -0,0 +1,21 @@
@import url("file/css/chat_style-cai-chat.css");
 .circle-bot, .circle-you {
    height: 90px;
    width: 60px;
    border-radius: 10px;
    background-color: #656565;
 }
 .circle-bot img, .circle-you img {
    border-radius: 8.333px;
 }
 .circle-you {
    background-color: #656565;
 }
 .message {
    padding-bottom: 30px;
    grid-template-columns: 70px minmax(0, 1fr);
 }
--- a/swarms/modelui/css/chat_style-cai-chat.css
+++ b/swarms/modelui/css/chat_style-cai-chat.css
@ -0,0 +1,66 @@
 .message {
    display: grid;
    grid-template-columns: 60px minmax(0, 1fr);
    padding-bottom: 25px;
    font-size: 15px;
    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
    line-height: 22.5px !important;
 }
 .message-body {
    margin-top: 3px;
 }
 .circle-you {
    width: 50px;
    height: 50px;
    background-color: rgb(238 78 59);
    border-radius: 50%;
 }
 .circle-bot {
    width: 50px;
    height: 50px;
    background-color: rgb(59 78 244);
    border-radius: 50%;
 }
 .circle-bot img,
 .circle-you img {
    border-radius: 50%;
    width: 100%;
    height: 100%;
    object-fit: cover;
 }
 .username {
    font-weight: bold;
 }
 .message-body img {
    max-width: 300px;
    max-height: 300px;
    border-radius: 20px;
 }
 .message-body p {
    font-size: 15px !important;
    line-height: 22.5px !important;
 }
 .message-body p, .chat .message-body ul, .chat .message-body ol {
    margin-bottom: 10px !important;
 }
 .message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child {
    margin-bottom: 0 !important;
 }
 .dark .message-body p em {
    color: rgb(138 138 138) !important;
 }
 .message-body p em {
    color: rgb(110 110 110) !important;
    font-weight: 500;
 }
--- a/swarms/modelui/css/chat_style-messenger.css
+++ b/swarms/modelui/css/chat_style-messenger.css
@ -0,0 +1,99 @@
 .message {
    padding-bottom: 25px;
    font-size: 15px;
    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
    line-height: 1.428571429;
 }
 .circle-you {
    width: 50px;
    height: 50px;
    background-color: rgb(238 78 59);
    border-radius: 50%;
 }
 .circle-bot {
    width: 50px;
    height: 50px;
    background-color: rgb(59 78 244);
    border-radius: 50%;
    float: left;
    margin-right: 10px;
    margin-top: 5px;
 }
 .circle-bot img,
 .circle-you img {
    border-radius: 50%;
    width: 100%;
    height: 100%;
    object-fit: cover;
 }
 .circle-you {
    margin-top: 5px;
    float: right;
 }
 .circle-bot + .text, .circle-you + .text {
    border-radius: 18px;
    padding: 8px 12px;
 }
 .circle-bot + .text {
    background-color: #E4E6EB;
    float: left;
 }
 .circle-you + .text {
    float: right;
    background-color: rgb(0 132 255);
    margin-right: 10px;
 }
 .circle-you + .text div, .circle-you + .text *, .dark .circle-you + .text div, .dark .circle-you + .text * {
    color: #FFF !important;
 }
 .circle-you + .text .username {
    text-align: right;
 }
 .dark .circle-bot + .text div, .dark .circle-bot + .text * {
    color: #000;
 }
 .text {
    max-width: 80%;
 }
 .text p {
    margin-top: 5px;
 }
 .username {
    font-weight: bold;
 }
 .message-body {
 }
 .message-body img {
    max-width: 300px;
    max-height: 300px;
    border-radius: 20px;
 }
 .message-body p {
    margin-bottom: 0 !important;
    font-size: 15px !important;
    line-height: 1.428571429 !important;
 }
 .dark .message-body p em {
    color: rgb(138 138 138) !important;
 }
 .message-body p em {
    color: rgb(110 110 110) !important;
 }
--- a/swarms/modelui/css/chat_style-wpp.css
+++ b/swarms/modelui/css/chat_style-wpp.css
@ -0,0 +1,55 @@
 .message {
    padding-bottom: 25px;
    font-size: 15px;
    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
    line-height: 1.428571429;
 }
 .text-you {
    background-color: #d9fdd3;
    border-radius: 15px;
    padding: 10px;
    padding-top: 5px;
    float: right;
 }
 .text-bot {
    background-color: #f2f2f2;
    border-radius: 15px;
    padding: 10px;
    padding-top: 5px;
 }
 .dark .text-you {
    background-color: #005c4b;
    color: #111b21;
 }
 .dark .text-bot {
    background-color: #1f2937;
    color: #111b21;
 }
 .text-bot p, .text-you p {
    margin-top: 5px;
 }
 .message-body img {
    max-width: 300px;
    max-height: 300px;
    border-radius: 20px;
 }
 .message-body p {
    margin-bottom: 0 !important;
    font-size: 15px !important;
    line-height: 1.428571429 !important;
 }
 .dark .message-body p em {
    color: rgb(138 138 138) !important;
 }
 .message-body p em {
    color: rgb(110 110 110) !important;
 }
--- a/swarms/modelui/css/html_4chan_style.css
+++ b/swarms/modelui/css/html_4chan_style.css
@ -0,0 +1,73 @@
 #parent #container {
    background-color: #eef2ff;
    padding: 17px;
 }
 #parent #container .reply {
    background-color: rgb(214 218 240);
    border-bottom: 1px solid rgb(183 197 217);
    border-image: none 100% 1 0 stretch;
    border-left: 0 none rgb(0 0 0);
    border-right: 1px solid rgb(183 197 217);
    color: rgb(0 0 0);
    display: table;
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    margin: 4px 0;
    overflow: hidden hidden;
    padding: 4px 2px;
 }
 #parent #container .number {
    color: rgb(0 0 0);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    width: 342.65px;
    margin-right: 7px;
 }
 #parent #container .op {
    color: rgb(0 0 0);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    margin: 4px 0 8px;
    overflow: hidden hidden;
 }
 #parent #container .op blockquote {
    margin-left: 0 !important;
 }
 #parent #container .name {
    color: rgb(17 119 67);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    font-weight: 700;
    margin-left: 7px;
 }
 #parent #container .quote {
    color: rgb(221 0 0);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    text-decoration: underline solid rgb(221 0 0);
    text-decoration-thickness: auto;
 }
 #parent #container .greentext {
    color: rgb(120 153 34);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
 }
 #parent #container blockquote {
    margin: 0 !important;
    margin-block: 1em 1em;
    margin-inline: 40px 40px;
    margin: 13.33px 40px !important;
 }
 #parent #container .message_4chan {
    color: black;
    border: none;
 }
--- a/swarms/modelui/css/html_instruct_style.css
+++ b/swarms/modelui/css/html_instruct_style.css
@ -0,0 +1,67 @@
 .message {
    display: grid;
    grid-template-columns: 60px 1fr;
    padding-bottom: 25px;
    font-size: 15px;
    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
    line-height: 22px;
 }
 .username {
    display: none;
 }
 .message-body p, .message-body li {
    font-size: 15px !important;
    line-height: 22.5px !important;
 }
 .message-body p, .chat .message-body ul, .chat .message-body ol {
    margin-bottom: 23.4375px !important;
 }
 .message-body p:last-child, .chat .message-body ul:last-child, .chat .message-body ol:last-child {
    margin-bottom: 0 !important;
 }
 .dark .message-body p em {
    color: rgb(198 202 214) !important;
 }
 .message-body p em {
    color: rgb(110 110 110) !important;
 }
 .gradio-container .chat .assistant-message {
    padding: 20px;
    border-radius: 20px;
    background-color: #0000000f;
    margin-top: 9px !important;
    margin-bottom: 18px !important;
 }
 .gradio-container .chat .user-message {
    padding: 20px;
    border-radius: 20px;
    margin-bottom: 9px !important;
 }
 .gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
    margin-bottom: 0 !important;
 }
 .dark .chat .assistant-message {
    background-color: #1f2937;
 }
 .dark .chat .user-message {
    background-color: transparent;
 }
 code {
    background-color: white !important;
 }
 .dark code {
    background-color: #0e1321 !important;
 }
--- a/swarms/modelui/css/html_readable_style.css
+++ b/swarms/modelui/css/html_readable_style.css
@ -0,0 +1,33 @@
 .readable-container {
    max-width: 600px;
    margin-left: auto;
    margin-right: auto;
    background-color: rgb(31 41 55);
    padding: 3em;
    word-break: break-word;
    overflow-wrap: anywhere;
    color: #efefef !important;
 }
 .readable-container p, .readable-container li {
    font-size: 16px !important;
    color: #efefef !important;
    margin-bottom: 22px;
    line-height: 1.4 !important;
 }
 .readable-container li > p {
    display: inline !important;
 }
 .readable-container code {
    overflow-x: auto;
 }
 .readable-container :not(pre) > code {
    white-space: normal !important;
 }
 .readable-container .hoverable {
    font-size: 14px;
 }
--- a/swarms/modelui/css/main.css
+++ b/swarms/modelui/css/main.css
@ -0,0 +1,650 @@
 .tabs.svelte-710i53 {
    margin-top: 0
 }
 .py-6 {
    padding-top: 2.5rem
 }
 .small-button {
    min-width: 0 !important;
    max-width: 171px;
    height: 39.594px;
    align-self: end;
 }
 .refresh-button {
    max-width: 4.4em;
    min-width: 2.2em !important;
    height: 39.594px;
    align-self: end;
    line-height: 1em;
    border-radius: 0.5em;
    flex: none;
 }
 .refresh-button-small {
    max-width: 2.2em;
 }
 .button_nowrap {
    white-space: nowrap;
 }
 #slim-column {
    flex: none !important;
    min-width: 0 !important;
 }
 .slim-dropdown {
    background-color: transparent !important;
    border: none !important;
    padding: 0 !important;
 }
 #download-label, #upload-label {
    min-height: 0
 }
 .dark svg {
    fill: white;
 }
 .dark a {
    color: white !important;
 }
 ol li p, ul li p {
    display: inline-block;
 }
 #chat-tab, #default-tab, #notebook-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab {
    border: 0;
 }
 .gradio-container-3-18-0 .prose * h1, h2, h3, h4 {
    color: white;
 }
 .gradio-container {
    max-width: 100% !important;
    padding-top: 0 !important;
 }
 #extensions {
    margin-top: 5px;
    margin-bottom: 35px;
 }
 .extension-tab {
    border: 0 !important;
 }
 span.math.inline {
    font-size: 27px;
    vertical-align: baseline !important;
 }
 div.svelte-15lo0d8 > *, div.svelte-15lo0d8 > .form > * {
    flex-wrap: nowrap;
 }
 .header_bar {
    background-color: #f7f7f7;
    margin-bottom: 19px;
    overflow-x: scroll;
    margin-left: calc(-1 * var(--size-4));
    margin-right: calc(-1 * var(--size-4));
    display: block !important;
    text-wrap: nowrap;
 }
 .dark .header_bar {
    border: none !important;
    background-color: #8080802b;
 }
 .header_bar button.selected {
    border-radius: 0;
 }
 .textbox_default textarea {
    height: calc(100dvh - 271px);
 }
 .textbox_default_output textarea {
    height: calc(100dvh - 185px);
 }
 .textbox textarea {
    height: calc(100dvh - 241px);
 }
 .textbox_logits textarea {
    height: calc(100dvh - 236px);
 }
 .textbox_logits_notebook textarea {
    height: calc(100dvh - 292px);
 }
 .monospace textarea {
    font-family: monospace;
 }
 .textbox_default textarea,
 .textbox_default_output textarea,
 .textbox_logits textarea,
 .textbox_logits_notebook textarea,
 .textbox textarea {
    font-size: 16px !important;
    color: #46464A !important;
 }
 .dark textarea {
    color: #efefef !important;
 }
@media screen and (width <= 711px) {
    .textbox_default textarea {
        height: calc(100dvh - 259px);
    }
    div .default-token-counter {
        top: calc( 0.5 * (100dvh - 236px) ) !important;
    }
    .transparent-substring {
        display: none;
    }
    .hover-menu {
        min-width: 250px !important;
    }
 }
 /* Hide the gradio footer */
 footer {
    display: none !important;
 }
 button {
    font-size: 14px !important;
 }
 .file-saver {
    position: fixed !important;
    height: 100%;
    z-index: 1000;
    background-color: rgb(0 0 0 / 50%) !important;
    margin-left: -20px;
    margin-right: -20px;
 }
 .file-saver > :first-child {
    position: fixed !important;
    top: 50%;
    left: 50%;
    transform: translate(-50%, -50%); /* center horizontally */
    width: 100%;
    max-width: 500px;
    background-color: var(--input-background-fill);
    border: var(--input-border-width) solid var(--input-border-color) !important;
 }
 .file-saver > :first-child > :nth-child(2) {
    background: var(--block-background-fill);
 }
 .checkboxgroup-table label {
    background: none !important;
    padding: 0 !important;
    border: 0 !important;
 }
 .checkboxgroup-table div {
    display: grid !important;
 }
 .markdown ul ol {
    font-size: 100% !important;
 }
 .pretty_scrollbar::-webkit-scrollbar {
    width: 5px;
 }
 .pretty_scrollbar::-webkit-scrollbar-track {
    background: transparent;
 }
 .pretty_scrollbar::-webkit-scrollbar-thumb,
 .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
    background: #c5c5d2;
 }
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
    background: #374151;
 }
 .pretty_scrollbar::-webkit-resizer {
    background: #c5c5d2;
 }
 .dark .pretty_scrollbar::-webkit-resizer {
    background: #374151;
 }
 audio {
    max-width: 100%;
 }
 /* Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui */
 .token-counter {
    position: absolute !important;
    top: calc( 0.5 * (100dvh - 218px) ) !important;
    right: 2px;
    z-index: 100;
    background: var(--input-background-fill) !important;
    min-height: 0 !important;
 }
 .default-token-counter {
    top: calc( 0.5 * (100dvh - 248px) ) !important;
 }
 .token-counter span {
    padding: 1px;
    box-shadow: 0 0 0 0.3em rgb(192 192 192 / 15%), inset 0 0 0.6em rgb(192 192 192 / 7.5%);
    border: 2px solid rgb(192 192 192 / 40%) !important;
    border-radius: 0.4em;
 }
 .no-background {
    background: var(--background-fill-primary) !important;
    padding: 0 !important;
 }
 /* ----------------------------------------------
  Chat tab
 ---------------------------------------------- */
 .h-\[40vh\], .wrap.svelte-byatnx.svelte-byatnx.svelte-byatnx {
    height: 66.67vh
 }
 .gradio-container {
    margin-left: auto !important;
    margin-right: auto !important;
 }
 .w-screen {
    width: unset
 }
 div.svelte-362y77>*, div.svelte-362y77>.form>* {
    flex-wrap: nowrap
 }
 .pending.svelte-1ed2p3z {
    opacity: 1;
 }
 .wrap.svelte-6roggh.svelte-6roggh {
    max-height: 92.5%;
 }
 /* This is for the microphone button in the whisper extension */
 .sm.svelte-1ipelgc {
    width: 100%;
 }
 #chat-tab button#Generate, #chat-tab button#stop {
    width: 89.3438px !important;
 }
 #chat-tab button, #notebook-tab button, #default-tab button {
    min-width: 0 !important;
 }
 #chat-tab > :first-child, #extensions {
    max-width: 880px;
    margin-left: auto;
    margin-right: auto;
 }
@media screen and (width <= 688px) {
    #chat-tab {
        padding-left: 0;
        padding-right: 0;
    }
    .chat-parent {
        height: calc(100dvh - 179px) !important;
    }
    .old-ui .chat-parent {
        height: calc(100dvh - 310px) !important;
    }
 }
 .chat {
    margin-left: auto;
    margin-right: auto;
    max-width: 880px;
    height: 100%;
    overflow-y: auto;
    padding-right: 15px;
    display: flex;
    flex-direction: column;
    word-break: break-word;
    overflow-wrap: anywhere;
 }
 .chat-parent {
    height: calc(100dvh - 181px);
    overflow: auto !important;
 }
 .old-ui .chat-parent {
    height: calc(100dvh - 270px);
 }
 .chat-parent.bigchat {
    height: calc(100dvh - 181px) !important;
 }
 .chat > .messages {
    display: flex;
    flex-direction: column;
 }
 .chat .message:last-child {
    margin-bottom: 0 !important;
    padding-bottom: 0 !important;
 }
 .message-body li:not(:last-child) {
    margin-top: 0 !important;
    margin-bottom: 2px !important;
 }
 .message-body li:last-child {
    margin-bottom: 0 !important;
 }
 .message-body li > p {
    display: inline !important;
 }
 .message-body ul, .message-body ol {
    font-size: 15px !important;
 }
 .message-body ul {
    list-style-type: disc !important;
 }
 .message-body pre:not(:last-child) {
    margin-bottom: 35.625px !important;
 }
 .message-body pre:last-child {
    margin-bottom: 0 !important;
 }
 .message-body code {
    white-space: pre-wrap !important;
    word-wrap: break-word !important;
    border: 1px solid var(--border-color-primary);
    border-radius: var(--radius-sm);
    background: var(--background-fill-secondary);
    font-size: 90%;
    padding: 1px 3px;
 }
 .message-body pre > code {
    display: block;
    padding: 15px;
 }
 .message-body :not(pre) > code {
    white-space: normal !important;
 }
 #chat-input {
    padding: 0;
    padding-top: 18px;
    background: transparent;
    border: none;
 }
 #chat-input textarea:focus {
    box-shadow: none !important;
 }
 #chat-input > :first-child {
    background-color: transparent;
 }
 #chat-input .progress-text {
    display: none;
 }
@media print {
    body {
        visibility: hidden;
    }
    .chat {
        visibility: visible;
        position: absolute;
        left: 0;
        top: 0;
        max-width: unset;
        max-height: unset;
        width: 100%;
        overflow-y: visible;
    }
    .message {
        break-inside: avoid;
    }
    .gradio-container {
        overflow: visible;
    }
    .tab-nav {
        display: none !important;
    }
    #chat-tab > :first-child {
        max-width: unset;
    }
 }
 #show-controls {
    position: absolute;
    height: 100%;
    background-color: var(--background-fill-primary);
    border: 0 !important;
    border-radius: 0;
 }
 #show-controls label {
    z-index: 1000;
    position: absolute;
    left: calc(100% - 168px);
 }
 #typing-container {
    display: none;
    position: absolute;
    background-color: transparent;
    left: -2px;
    padding: var(--block-padding);
 }
 .typing {
    position: relative;
 }
 .visible-dots #typing-container {
    display: block;
 }
 .typing span {
    content: '';
    animation: blink 1.5s infinite;
    animation-fill-mode: both;
    height: 10px;
    width: 10px;
    background: #3b5998;;
    position: absolute;
    left:0;
    top:0;
    border-radius: 50%;
 }
 .typing .dot1 {
    animation-delay: .2s;
    margin-left: calc(10px * 1.5);
 }
 .typing .dot2 {
    animation-delay: .4s;
    margin-left: calc(10px * 3);
 }
@keyframes blink {
    0% {
        opacity: .1;
    }
    20% {
        opacity: 1;
    }
    100% {
        opacity: .1;
    }
 }
 #chat-tab .generating {
    display: none !important;
 }
 .hover-element {
    position: relative;
    font-size: 24px;
 }
 .hover-menu {
    display: none;
    position: absolute;
    bottom: 80%;
    left: 0;
    background-color: var(--background-fill-secondary);
    box-shadow: 0 0 10px rgb(0 0 0 / 50%);
    z-index: 10000;
    min-width: 330px;
    flex-direction: column;
 }
 .hover-menu button {
    width: 100%;
    background: transparent !important;
    border-radius: 0 !important;
    justify-content: space-between;
    margin: 0 !important;
    height: 36px;
 }
 .hover-menu button:not(#clear-history-confirm) {
    border-bottom: 0 !important;
 }
 .hover-menu button:not(#clear-history-confirm):last-child {
    border-bottom: var(--button-border-width) solid var(--button-secondary-border-color) !important;
 }
 .hover-menu button:hover {
    background: var(--button-secondary-background-fill-hover) !important;
 }
 .transparent-substring {
    opacity: 0.333;
 }
 #chat-tab:not(.old-ui) #chat-buttons {
    display: none !important;
 }
 #gr-hover-container {
    min-width: 0 !important;
    display: flex;
    flex-direction: column-reverse;
    padding-right: 20px;
    padding-bottom: 3px;
    flex-grow: 0 !important;
 }
 #generate-stop-container {
    min-width: 0 !important;
    display: flex;
    flex-direction: column-reverse;
    padding-bottom: 3px;
    flex: 0 auto !important;
 }
 #chat-input-container {
    min-width: 0 !important;
 }
 #chat-input-container > .form {
    background: transparent;
    border: none;
 }
 #chat-input-row {
    padding-bottom: 20px;
 }
 .old-ui #chat-input-row, #chat-input-row.bigchat {
    padding-bottom: 0 !important;
 }
 #chat-col {
    padding-bottom: 115px;
 }
 .old-ui #chat-col, #chat-col.bigchat {
    padding-bottom: 95px !important;
 }
 .old-ui #chat-buttons #clear-history-confirm {
    order: -1;
 }
 .chat ol, .chat ul {
    margin-top: 6px !important;
 }
 /* ----------------------------------------------
  Past chats menus
 ---------------------------------------------- */
 #past-chats-row {
    margin-bottom: calc( -1 * var(--layout-gap) );
 }
 #rename-row label {
    margin-top: var(--layout-gap);
 }
 /* ----------------------------------------------
  Keep dropdown menus above errored components
 ---------------------------------------------- */
 .options {
    z-index: 100 !important;
 }
--- a/swarms/modelui/download-model.py
+++ b/swarms/modelui/download-model.py
@ -0,0 +1,292 @@
 '''
 Downloads models from Hugging Face to models/username_modelname.
 Example:
 python download-model.py facebook/opt-1.3b
 '''
 import argparse
 import base64
 import datetime
 import hashlib
 import json
 import os
 import re
 import sys
 from pathlib import Path
 import requests
 import tqdm
 from requests.adapters import HTTPAdapter
 from tqdm.contrib.concurrent import thread_map
 base = "https://huggingface.co"
 class ModelDownloader:
    def __init__(self, max_retries=5):
        self.session = requests.Session()
        if max_retries:
            self.session.mount('https://cdn-lfs.huggingface.co', HTTPAdapter(max_retries=max_retries))
            self.session.mount('https://huggingface.co', HTTPAdapter(max_retries=max_retries))
        if os.getenv('HF_USER') is not None and os.getenv('HF_PASS') is not None:
            self.session.auth = (os.getenv('HF_USER'), os.getenv('HF_PASS'))
        if os.getenv('HF_TOKEN') is not None:
            self.session.headers = {'authorization': f'Bearer {os.getenv("HF_TOKEN")}'}
    def sanitize_model_and_branch_names(self, model, branch):
        if model[-1] == '/':
            model = model[:-1]
        if model.startswith(base + '/'):
            model = model[len(base) + 1:]
        model_parts = model.split(":")
        model = model_parts[0] if len(model_parts) > 0 else model
        branch = model_parts[1] if len(model_parts) > 1 else branch
        if branch is None:
            branch = "main"
        else:
            pattern = re.compile(r"^[a-zA-Z0-9._-]+$")
            if not pattern.match(branch):
                raise ValueError(
                    "Invalid branch name. Only alphanumeric characters, period, underscore and dash are allowed.")
        return model, branch
    def get_download_links_from_huggingface(self, model, branch, text_only=False, specific_file=None):
        page = f"/api/models/{model}/tree/{branch}"
        cursor = b""
        links = []
        sha256 = []
        classifications = []
        has_pytorch = False
        has_pt = False
        has_gguf = False
        has_safetensors = False
        is_lora = False
        while True:
            url = f"{base}{page}" + (f"?cursor={cursor.decode()}" if cursor else "")
            r = self.session.get(url, timeout=10)
            r.raise_for_status()
            content = r.content
            dict = json.loads(content)
            if len(dict) == 0:
                break
            for i in range(len(dict)):
                fname = dict[i]['path']
                if specific_file not in [None, ''] and fname != specific_file:
                    continue
                if not is_lora and fname.endswith(('adapter_config.json', 'adapter_model.bin')):
                    is_lora = True
                is_pytorch = re.match(r"(pytorch|adapter|gptq)_model.*\.bin", fname)
                is_safetensors = re.match(r".*\.safetensors", fname)
                is_pt = re.match(r".*\.pt", fname)
                is_gguf = re.match(r'.*\.gguf', fname)
                is_tiktoken = re.match(r".*\.tiktoken", fname)
                is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname) or is_tiktoken
                is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer
                if any((is_pytorch, is_safetensors, is_pt, is_gguf, is_tokenizer, is_text)):
                    if 'lfs' in dict[i]:
                        sha256.append([fname, dict[i]['lfs']['oid']])
                    if is_text:
                        links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
                        classifications.append('text')
                        continue
                    if not text_only:
                        links.append(f"https://huggingface.co/{model}/resolve/{branch}/{fname}")
                        if is_safetensors:
                            has_safetensors = True
                            classifications.append('safetensors')
                        elif is_pytorch:
                            has_pytorch = True
                            classifications.append('pytorch')
                        elif is_pt:
                            has_pt = True
                            classifications.append('pt')
                        elif is_gguf:
                            has_gguf = True
                            classifications.append('gguf')
            cursor = base64.b64encode(f'{{"file_name":"{dict[-1]["path"]}"}}'.encode()) + b':50'
            cursor = base64.b64encode(cursor)
            cursor = cursor.replace(b'=', b'%3D')
        # If both pytorch and safetensors are available, download safetensors only
        if (has_pytorch or has_pt) and has_safetensors:
            for i in range(len(classifications) - 1, -1, -1):
                if classifications[i] in ['pytorch', 'pt']:
                    links.pop(i)
        if has_gguf and specific_file is None:
            for i in range(len(classifications) - 1, -1, -1):
                if 'q4_k_m' not in links[i].lower():
                    links.pop(i)
        is_llamacpp = has_gguf and specific_file is not None
        return links, sha256, is_lora, is_llamacpp
    def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, base_folder=None):
        if base_folder is None:
            base_folder = 'models' if not is_lora else 'loras'
        # If the model is of type GGUF, save directly in the base_folder
        if is_llamacpp:
            return Path(base_folder)
        output_folder = f"{'_'.join(model.split('/')[-2:])}"
        if branch != 'main':
            output_folder += f'_{branch}'
        output_folder = Path(base_folder) / output_folder
        return output_folder
    def get_single_file(self, url, output_folder, start_from_scratch=False):
        filename = Path(url.rsplit('/', 1)[1])
        output_path = output_folder / filename
        headers = {}
        mode = 'wb'
        if output_path.exists() and not start_from_scratch:
            # Check if the file has already been downloaded completely
            r = self.session.get(url, stream=True, timeout=10)
            total_size = int(r.headers.get('content-length', 0))
            if output_path.stat().st_size >= total_size:
                return
            # Otherwise, resume the download from where it left off
            headers = {'Range': f'bytes={output_path.stat().st_size}-'}
            mode = 'ab'
        with self.session.get(url, stream=True, headers=headers, timeout=10) as r:
            r.raise_for_status()  # Do not continue the download if the request was unsuccessful
            total_size = int(r.headers.get('content-length', 0))
            block_size = 1024 * 1024  # 1MB
            tqdm_kwargs = {
                'total': total_size,
                'unit': 'iB',
                'unit_scale': True,
                'bar_format': '{l_bar}{bar}| {n_fmt:6}/{total_fmt:6} {rate_fmt:6}'
            }
            if 'COLAB_GPU' in os.environ:
                tqdm_kwargs.update({
                    'position': 0,
                    'leave': True
                })
            with open(output_path, mode) as f:
                with tqdm.tqdm(**tqdm_kwargs) as t:
                    count = 0
                    for data in r.iter_content(block_size):
                        t.update(len(data))
                        f.write(data)
                        if total_size != 0 and self.progress_bar is not None:
                            count += len(data)
                            self.progress_bar(float(count) / float(total_size), f"{filename}")
    def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4):
        thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True)
    def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
        self.progress_bar = progress_bar
        # Create the folder and writing the metadata
        output_folder.mkdir(parents=True, exist_ok=True)
        if not is_llamacpp:
            metadata = f'url: https://huggingface.co/{model}\n' \
                       f'branch: {branch}\n' \
                       f'download date: {datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")}\n'
            sha256_str = '\n'.join([f'    {item[1]} {item[0]}' for item in sha256])
            if sha256_str:
                metadata += f'sha256sum:\n{sha256_str}'
            metadata += '\n'
            (output_folder / 'huggingface-metadata.txt').write_text(metadata)
        if specific_file:
            print(f"Downloading {specific_file} to {output_folder}")
        else:
            print(f"Downloading the model to {output_folder}")
        self.start_download_threads(links, output_folder, start_from_scratch=start_from_scratch, threads=threads)
    def check_model_files(self, model, branch, links, sha256, output_folder):
        # Validate the checksums
        validated = True
        for i in range(len(sha256)):
            fpath = (output_folder / sha256[i][0])
            if not fpath.exists():
                print(f"The following file is missing: {fpath}")
                validated = False
                continue
            with open(output_folder / sha256[i][0], "rb") as f:
                file_hash = hashlib.file_digest(f, "sha256").hexdigest()
                if file_hash != sha256[i][1]:
                    print(f'Checksum failed: {sha256[i][0]}  {sha256[i][1]}')
                    validated = False
                else:
                    print(f'Checksum validated: {sha256[i][0]}  {sha256[i][1]}')
        if validated:
            print('[+] Validated checksums of all model files!')
        else:
            print('[-] Invalid checksums. Rerun download-model.py with the --clean flag.')
 if __name__ == '__main__':
    parser = argparse.ArgumentParser()
    parser.add_argument('MODEL', type=str, default=None, nargs='?')
    parser.add_argument('--branch', type=str, default='main', help='Name of the Git branch to download from.')
    parser.add_argument('--threads', type=int, default=4, help='Number of files to download simultaneously.')
    parser.add_argument('--text-only', action='store_true', help='Only download text files (txt/json).')
    parser.add_argument('--specific-file', type=str, default=None, help='Name of the specific file to download (if not provided, downloads all).')
    parser.add_argument('--output', type=str, default=None, help='The folder where the model should be saved.')
    parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.')
    parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.')
    parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.')
    args = parser.parse_args()
    branch = args.branch
    model = args.MODEL
    specific_file = args.specific_file
    if model is None:
        print("Error: Please specify the model you'd like to download (e.g. 'python download-model.py facebook/opt-1.3b').")
        sys.exit()
    downloader = ModelDownloader(max_retries=args.max_retries)
    # Clean up the model/branch names
    try:
        model, branch = downloader.sanitize_model_and_branch_names(model, branch)
    except ValueError as err_branch:
        print(f"Error: {err_branch}")
        sys.exit()
    # Get the download links from Hugging Face
    links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=args.text_only, specific_file=specific_file)
    # Get the output folder
    output_folder = downloader.get_output_folder(model, branch, is_lora, is_llamacpp=is_llamacpp, base_folder=args.output)
    if args.check:
        # Check previously downloaded files
        downloader.check_model_files(model, branch, links, sha256, output_folder)
    else:
        # Download files
        downloader.download_model_files(model, branch, links, sha256, output_folder, specific_file=specific_file, threads=args.threads, is_llamacpp=is_llamacpp)
--- a/swarms/modelui/extensions/Training_PRO/README.md
+++ b/swarms/modelui/extensions/Training_PRO/README.md
@ -0,0 +1,92 @@
 # Training_PRO
 This is an expanded and reworked Training tab
 Maintained by FP
 [![ko-fi](https://ko-fi.com/img/githubbutton_sm.svg)](https://ko-fi.com/Q5Q5MOB4M)
 Repo home:
 https://github.com/FartyPants/Training_PRO
 In general the repo above is ahead of the extension included in text WebUi.
 ## News
 - NEFtune: add noise to help with generalization
 - Loss Graph in interface.
 - Supports Mistral training
 - some roundabout around pytorch and transformers version desync
 ![image](https://github.com/FartyPants/Training_PRO/assets/23346289/e389ec69-d7ad-4922-9ad9-865625997479)
 ## Features/Changes
 - Chunking: precise raw text slicer (PRTS) uses sentence slicing and making sure things are clean on all ends
 - overlap chunking - this special overlapping will make additional overlap block based on logical rules (aka no overlap block on hard cut)
 - custom scheduler (follow the code to make your own) In LR Scheduler select FP_low_epoch_annealing - this scheduler will keep the LR constant for first epoch then use cosine for the rest - this part would be best to spawn into a new py file
 - saves graph png file at the end with learning rate and loss per epoch
 - adding EOS to each block or to hard cut only
 - automatically lowers gradient accumulation if you go overboard and set gradient accumulation that will be higher than actual data - transformers would then throw error (or they used to, not sure if still true) but in any way, it will fix bad data
 - turn BOS on and OFF
 - target selector
 - DEMENTOR LEARNING (experimental) Deep Memorization Enforcement Through Overlapping and Repetition. This is an experiment for long-text learning using low epochs (basically use 1 epoch with constant LR or 2 epochs with FP_low_epoch_annealing LR scheduler)
 - Getting rid of micro batch size/batch size confusion. Now there is True Batch Size and Gradient accumulation slider, consisten with all the other training out there
 - Ability to save Checkpoint during training with a button
 - Ability to change Stop Loss during training
 - different modes of checkpoint auto saving
 - Function to Check Dataset and suggest parameters such as warmup and checkpoint save frequency before training
 - Graph Training Loss in interface
 - more custom schedulers
 ### Notes:
 This uses it's own chunking code for raw text based on sentence splitting. This will avoid weird cuts in the chunks and each chunk should now start with sentence and end on some sentence. It works hand in hand with Hard Cut. A propper use is to structure your text into logical blocks (ideas) separated by three \n then use three \n in hard cut. This way each chunk will contain only one flow of ideas and not derail in the thoughts. And Overlapping code will create overlapped blocks on sentence basis too, but not cross hard cut, thus not cross different ideas either. Does it make any sense? No? Hmmmm...
 ### Custom schedulers
 A bunch of custom (combination) schedulers are added to the LR schedule. These are based on my own experiments
 **FP_low_epoch_annealing**
 Uses constant LR (with warmup) for 1 epoch only. The rest of the epoch(s) is cosine annealing. So 10 epochs - 1 will be constant 9 will be nose dive down. However a typical usage would be 2 epochs (hence low epoch in name). 1st is constant, the second is annealing. Simple. I use it 90% of time.
 **FP_half_time_annealing**
 Like the low epoch, but now the total number of steps is divided by 2. First half is constant, second half is annealing. So 10 epochs - 5 will be constant, 5 will be cosine nose down.
 **FP_raise_fall_creative**
 This is a sine raise till half of the total steps then cosine fall the rest. (Or you may think of the curve as sine in its entirety. The most learning is done in the hump, in the middle. The warmup entry has no effect, since sine is automatically warm up.
 The idea is to start very mildly as not to overfit with the first blocks of dataset. It seems to broaden the scope of the model making it less strict for tight dataset. 
 ### Targets
 Normal LORA is q, v and that's what you should use. You can use (q k v o) or (q k v) and it will give you a lot more trainable parameters. The benefit is that you can keep rank lower and still attain the same coherency as q v with high rank. Guanaco has been trained with QLORA and q k v o for example and they swear by it.
 ### DEMENTOR LEARNING (experimental) Deep Memorization Enforcement Through Overlapping and Repetition
 This is and experimental chunking to train long-form text in low number of epochs (basically 1) with sliding repetition. The depth of learning directly depends on the cutoff_length. Increasing cutoff length will also increase number of blocks created from long-form text (which is contrary to normal training). It is based on my own wild experiments. 
 ### Getting rid of batch size and micro batch size
 Keeping consistency with everyone else. 
 Listen, There is only ONE batch size - the True batch size (called previously micro-batch size in WebUI) - this is how many blocks are processed at once (during a single step). It eats GPU, but it really helps with the quality training (in fact the ideal batch size would be the same as number of blocks - which is unrealistic) - so the idea is to cram as much True Batch Size before your GPU blows with OOM. On 24GB this is about 10 for 13b (loaded with 4-bit)
 So no micro batch size - it is now called True Batch Size, because that's what it is.
 The other thing is Gradient Accumulation - this is an emulation of the above Batch size - a virtual batch size, if you will. If your GPU can't handle real batch size then you may fake it using Gradient Accumulation. This will accumulate the gradients over so many steps defined here and then update the weights at the end without increase in GPU.
 Gradient accumulation is like a virtual Batch size multiplier without the GPU penalty.
 If your batch size is 4 and your gradient accumulation is 2 then it sort of behaves as if we have batch size 8. *Sort of* because Batch size of 4 and GA of 2 is NOT the same as batch size of 2 and GA of 4. (It produces different weights - hence it's not an equivalent). The idea is that if you don't have GPU - using GA to extend batch size is the next best thing (good enough) since you have no other choice.
 If all you can afford is 1 batch size, then increasing GA will likely make the learning better in some range of GA (it's not always more is better).
 However - GA is not some golden goose. As said, it isn't the same as batch size. In fact GA may worsen your learning as well.
 I would suggest a series of experiment where you would put batch size as high as possible without OOM, set GA 1, then repeat training while increasing the GA (2, 4...), and see how the model changes. It's likely that it would follow some sort of curve where GA will seem to help before it will make it worse. Some people believe that if you can squeeze 6 BATCH Size, then you should not bother with GA at all... YMMW
 High Batch Size vs High GA would also likely produce different results in terms of learning  words vs style. How? Hmmmm... good question.
 One optical "benefit" of GA is that the loss will fluctuate less (because of all the gradient accumulation, which works as a form of noise smoothing as well).
--- a/swarms/modelui/extensions/Training_PRO/custom_scheduler.py
+++ b/swarms/modelui/extensions/Training_PRO/custom_scheduler.py
@ -0,0 +1,433 @@
 from functools import partial
 import torch
 import transformers
 import math
 from torch.optim.lr_scheduler import LambdaLR
 from peft import (
    PeftModel,
 )
 RED = "\033[91m"
 YELLOW = "\033[93m"
 GREEN = "\033[92m"
 RESET = "\033[0m"
 last_print_label = ''
 custom_scheduler_params = {'trigger_loss': 0.0, 'ramp_down_ratio':1.0, 'current_loss': 0.0,'dynamic_scheduler_stop': False, 'calc_ramp_down_at_step': 0, 'calc_num_training_steps': 0}
 def custom_scheduler_global_update(current_loss: float):
    custom_scheduler_params.update({'current_loss': current_loss})
 def custom_scheduler_global_setup(trigger_loss: float, ramp_down_ratio: float):
    custom_scheduler_params.update({'trigger_loss': trigger_loss})
    custom_scheduler_params.update({'ramp_down_ratio': ramp_down_ratio})
    # calculates the total num steps after trigger
    custom_scheduler_params.update({'calc_num_training_steps': 0})
    #calculates steps when the ramp_down trigger occured
    custom_scheduler_params.update({'calc_ramp_down_at_step': 0})
    # triggers scheduler stopping after it reached calc_num_training_steps
    custom_scheduler_params.update({'dynamic_scheduler_stop': False})
 # hold constant to the half of epochs then cosine down to 0
 def _get_fp_half_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
    global last_print_label
    print_label = ''
    half_steps = num_training_steps//2
    num_warmup_steps = min(num_warmup_steps,half_steps)
    if current_step < num_warmup_steps:
        print_label = 'Scheduler: Warmup'
    elif current_step < half_steps:
        print_label = 'Scheduler: Hold'
    else:
        print_label = 'Scheduler: Annealing'
    if print_label != last_print_label:
        print(print_label)
    last_print_label = print_label
    if current_step < num_warmup_steps:
        return float(current_step) / float(max(1, num_warmup_steps))
    if current_step < half_steps:
        return 1.0 
    progress = float(current_step - half_steps) / float(max(1, num_training_steps - half_steps))
    num_cycles = 0.5
    return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))    
 # raise up in cosine, then fall back in cosine
 def _get_fp_cosine_raise_and_fall_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
    global last_print_label
    print_label = ''
    half_steps = num_training_steps//2
    #num_warmup_steps = min(num_warmup_steps,half_steps)
    if current_step < half_steps:
        print_label = 'Scheduler: Raise'
    else:
        print_label = 'Scheduler: Fall'
    if print_label != last_print_label:
        print(print_label)
    last_print_label = print_label
    # linear
    #    return float(current_step) / float(max(1, num_warmup_steps))
    progress = float(current_step - half_steps) / float(max(1, num_training_steps - half_steps))
    num_cycles = 0.5
    return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))    
 # constant to the first epochs then cosine down to 0 over the rest epochs
 def _get_fp_cosine_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
    global last_print_label
    print_label = ''
    num_warmup_steps = min(num_warmup_steps,num_firstepoch_steps)
    if current_step < num_warmup_steps:
        print_label = 'Scheduler: Warmup'
    elif current_step < num_firstepoch_steps:
        print_label = 'Scheduler: Hold'
    else:
        print_label = 'Scheduler: Annealing'
    if print_label != last_print_label:
        print(print_label)
    last_print_label = print_label
    if current_step < num_warmup_steps:
        return float(current_step) / float(max(1, num_warmup_steps))
    if current_step < num_firstepoch_steps:
        return 1.0 
    progress = float(current_step - num_firstepoch_steps) / float(max(1, num_training_steps - num_firstepoch_steps))
    num_cycles = 0.5
    return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))    
 # halve lr each epoch   
 def _get_fp_cdrop_rate_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
    global last_print_label
    print_label = ''
    num_warmup_steps = min(num_warmup_steps, num_firstepoch_steps)
    current_epoch = (current_step // num_firstepoch_steps) + 1
    if current_step < num_warmup_steps:
        print_label = 'Scheduler: Warmup'
    elif current_step < num_firstepoch_steps:
        print_label = 'Scheduler: Hold'
    else:
        print_label = 'Scheduler: Drop Rate'
    if print_label != last_print_label:
        print(print_label)
    last_print_label = print_label
    if current_step < num_warmup_steps:
        return float(current_step) / float(max(1, num_warmup_steps))
    if current_step < num_firstepoch_steps:
        return 1.0 
    # Compute the learning rate for the annealing phase
    learning_rate = 1.0 / float(2 ** (current_epoch - 1))
    return learning_rate
 # epoch decay: 1/(1 + decay * epoch)
 def custom_cosine_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_firstepoch_steps, last_epoch=-1):
    """
    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.
    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    """
    lr_lambda = partial(
        _get_fp_cosine_schedule_with_warmup_lr_lambda,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps,
        num_firstepoch_steps = num_firstepoch_steps,
    )
    return LambdaLR(optimizer, lr_lambda, last_epoch)
 def custom_half_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_firstepoch_steps, last_epoch=-1):
    """
    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.
    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    """
    lr_lambda = partial(
        _get_fp_half_schedule_with_warmup_lr_lambda,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps,
        num_firstepoch_steps = num_firstepoch_steps,
    )
    return LambdaLR(optimizer, lr_lambda, last_epoch)
 def custom_raise_fall_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_steps, num_firstepoch_steps, last_epoch=-1):
    """
    Args:
        optimizer ([`~torch.optim.Optimizer`]):
            The optimizer for which to schedule the learning rate.
        num_warmup_steps (`int`):
            The number of steps for the warmup phase.
        num_training_steps (`int`):
            The total number of training steps.
        last_epoch (`int`, *optional*, defaults to -1):
            The index of the last epoch when resuming training.
    Return:
        `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
    """
    lr_lambda = partial(
        _get_fp_cosine_raise_and_fall_lr_lambda,
        num_warmup_steps=num_warmup_steps,
        num_training_steps=num_training_steps,
        num_firstepoch_steps = num_firstepoch_steps,
    )
    return LambdaLR(optimizer, lr_lambda, last_epoch)
 def neftune_forward(self, input: torch.Tensor):
    """
    Implements the NEFTune forward pass for the model. Note this works only for
    torch.nn.Embedding layers. This method is slightly adapted from the original source code
    that can be found here: https://github.com/neelsjain/NEFTune
    Args:
        input (`torch.Tensor`):
            The input tensor to the model.
        noise_alpha (`float`):
            The noise alpha value to use for the NEFTune forward pass.
    """
    embeddings = torch.nn.functional.embedding(
        input, self.weight, self.padding_idx, self.max_norm, self.norm_type, self.scale_grad_by_freq, self.sparse
    )
    if self.training:
        # Add noise to the embeddings
        dims = torch.tensor(embeddings.size(1) * embeddings.size(2))
        mag_norm = self.neftune_noise_alpha / torch.sqrt(dims)
        embeddings = embeddings + torch.zeros_like(embeddings).uniform_(-mag_norm, mag_norm)
    return embeddings    
 class FPNEFtuneTrainer(transformers.Trainer):
    def __init__(self,neftune_noise_alpha:float = 0.0, model = None, *args, **kwargs):
        self.neftune_noise_alpha = neftune_noise_alpha
        if self.neftune_noise_alpha > 0.0:
            model = self._activate_neftune(model)
        super().__init__(model = model, *args, **kwargs)
    def _activate_neftune(self, model):
        r"""
        Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: https://arxiv.org/abs/2310.05914
        """
        print(f"Activating {RED}NEFtune{RESET} with scale: {self.neftune_noise_alpha}")
        if isinstance(model, transformers.PreTrainedModel):
            embeddings = model.get_input_embeddings()
        elif isinstance(model, PeftModel):
            embeddings = model.base_model.get_input_embeddings()
        embeddings.neftune_noise_alpha = self.neftune_noise_alpha
        old_forward = embeddings.forward
        # This hack seems to be needed to properly use a custom forward pass
        # all credits to: https://discuss.pytorch.org/t/how-can-i-replace-the-forward-method-of-a-predefined-torchvision-model-with-my-customized-forward-function/54224/11
        bound_method = neftune_forward.__get__(embeddings, embeddings.__class__)
        setattr(embeddings, "forward", bound_method)
        # embeddings.forward = neftune_forward
        embeddings._trl_old_forward = old_forward
        return model
    def train(self, *args, **kwargs):
        output = super().train(*args, **kwargs)
        # After training we make sure to retrieve back the original forward pass method
        # for the embedding layer
        if self.neftune_noise_alpha is not None:
            if isinstance(self.model, transformers.PreTrainedModel):
                embeddings = self.model.get_input_embeddings()
            elif isinstance(self.model, PeftModel):
                embeddings = self.model.base_model.get_input_embeddings()
            if hasattr(embeddings, "_trl_old_forward"):
                embeddings.forward = embeddings._trl_old_forward
                del embeddings._trl_old_forward
                del embeddings.neftune_noise_alpha
        return output
 class FPSchedulerTrainer(transformers.Trainer):
    def __init__(self,neftune_noise_alpha:float = 0.0, model = None, *args, **kwargs):
        self.neftune_noise_alpha = neftune_noise_alpha
        if self.neftune_noise_alpha > 0.0:
            model = self._activate_neftune(model)
        super().__init__(model = model, *args, **kwargs)
    def _activate_neftune(self, model):
        r"""
        Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: https://arxiv.org/abs/2310.05914
        """
        print(f"Activating {RED}NEFtune{RESET} with scale: {self.neftune_noise_alpha}")
        if isinstance(model, transformers.PreTrainedModel):
            embeddings = model.get_input_embeddings()
        elif isinstance(model, PeftModel):
            embeddings = model.base_model.get_input_embeddings()
        embeddings.neftune_noise_alpha = self.neftune_noise_alpha
        old_forward = embeddings.forward
        # This hack seems to be needed to properly use a custom forward pass
        # all credits to: https://discuss.pytorch.org/t/how-can-i-replace-the-forward-method-of-a-predefined-torchvision-model-with-my-customized-forward-function/54224/11
        bound_method = neftune_forward.__get__(embeddings, embeddings.__class__)
        setattr(embeddings, "forward", bound_method)
        # embeddings.forward = neftune_forward
        embeddings._trl_old_forward = old_forward
        return model
    def train(self, *args, **kwargs):
        output = super().train(*args, **kwargs)
        # After training we make sure to retrieve back the original forward pass method
        # for the embedding layer
        if self.neftune_noise_alpha is not None:
            if isinstance(self.model, transformers.PreTrainedModel):
                embeddings = self.model.get_input_embeddings()
            elif isinstance(self.model, PeftModel):
                embeddings = self.model.base_model.get_input_embeddings()
            if hasattr(embeddings, "_trl_old_forward"):
                embeddings.forward = embeddings._trl_old_forward
                del embeddings._trl_old_forward
                del embeddings.neftune_noise_alpha
        return output
    def create_scheduler(self, num_training_steps: int, optimizer: torch.optim.Optimizer = None):
        #Setup the scheduler. The optimizer of the trainer must have been set up either before this method is called or passed as an argument.
        num_train_epochs = self.args.num_train_epochs
        num_warmup_steps=self.args.get_warmup_steps(num_training_steps)
        num_firstepoch_steps = math.ceil(num_training_steps/num_train_epochs)
        num_warmup_acc = num_warmup_steps*self.args.gradient_accumulation_steps 
        num_firstepoch_steps_acc = num_firstepoch_steps*self.args.gradient_accumulation_steps
        num_training_steps_acc = num_training_steps*self.args.gradient_accumulation_steps
        custom_scheduler_params.update({'dynamic_scheduler_stop': False})
        print (f"Warm-up steps aligned to Gradient accumulation ({self.args.gradient_accumulation_steps}) = {num_warmup_acc} actual warmup steps")
        if self.args.lr_scheduler_type == 'cosine':
            num_warmup_acc_min = min(num_warmup_acc, num_firstepoch_steps_acc)
            if num_warmup_acc>num_firstepoch_steps_acc:
                print(f"\033[1;31;1mWARNING: The number of warmup steps is set too high! It will be clamped to 1 epoch, essentially going from warmup to annealing.\033[0;37;0m")
                print (f"FP Scheduler Warmup: 0-[{num_warmup_acc_min}], Hold [{num_warmup_acc_min}]-{num_firstepoch_steps_acc}, Annealing {num_firstepoch_steps_acc}-{num_training_steps_acc}")
            else:
                print (f"FP Scheduler Warmup: 0-{num_warmup_acc_min}, Hold {num_warmup_acc_min}-{num_firstepoch_steps_acc}, Annealing {num_firstepoch_steps_acc}-{num_training_steps_acc}")
            self.lr_scheduler = custom_cosine_scheduler_with_warmup(
                    optimizer=self.optimizer if optimizer is None else optimizer,
                    num_warmup_steps=num_warmup_steps,
                    num_training_steps=num_training_steps, 
                    num_firstepoch_steps = num_firstepoch_steps,
                )
            self._created_lr_scheduler = True
            return self.lr_scheduler
        elif self.args.lr_scheduler_type == 'constant':
            half_step_acc = num_training_steps_acc//2
            num_warmup_acc_min = min(num_warmup_acc, half_step_acc)
            if num_warmup_acc>half_step_acc:
                print(f"\033[1;31;1mWARNING: The number of warmup steps is set too high! It will be clamped to half of all epochs, essentially going from warmup to annealing in the middle.\033[0;37;0m")
                print (f"FP Scheduler Warmup: 0-[{num_warmup_acc_min}], Hold [{num_warmup_acc_min}]-{half_step_acc}, Annealing {half_step_acc}-{num_training_steps_acc}")
            else:
                print (f"FP Scheduler Warmup: 0-{num_warmup_acc_min}, Hold {num_warmup_acc_min}-{half_step_acc}, Annealing {half_step_acc}-{num_training_steps_acc}")
            self.lr_scheduler = custom_half_scheduler_with_warmup(
                    optimizer=self.optimizer if optimizer is None else optimizer,
                    num_warmup_steps=num_warmup_steps,
                    num_training_steps=num_training_steps, 
                    num_firstepoch_steps = num_firstepoch_steps,
                )
            self._created_lr_scheduler = True
            return self.lr_scheduler
        elif self.args.lr_scheduler_type == 'constant_with_warmup':
            half_step_acc = num_training_steps_acc//2
            if num_warmup_steps>0:
                print(f"Warmup doesn't apply to this scheduler [Raise-Fall]")
            print (f"Scheduler Raise: 0-{half_step_acc}, Fall {half_step_acc}-{num_training_steps_acc}")
            self.lr_scheduler = custom_raise_fall_scheduler_with_warmup(
                    optimizer=self.optimizer if optimizer is None else optimizer,
                    num_warmup_steps=num_warmup_steps,
                    num_training_steps=num_training_steps, 
                    num_firstepoch_steps = num_firstepoch_steps,
                )
            self._created_lr_scheduler = True
            return self.lr_scheduler        
        else:
            return  super().create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer)
--- a/swarms/modelui/extensions/Training_PRO/matplotgraph.py
+++ b/swarms/modelui/extensions/Training_PRO/matplotgraph.py
@ -0,0 +1,62 @@
 import os
 import json
 def create_graph(lora_path, lora_name):
    try:
        import matplotlib.pyplot as plt
        from matplotlib.ticker import ScalarFormatter
        peft_model_path = f'{lora_path}/training_graph.json'
        image_model_path = f'{lora_path}/training_graph.png'
        # Check if the JSON file exists
        if os.path.exists(peft_model_path):
            # Load data from JSON file
            with open(peft_model_path, 'r') as file:
                data = json.load(file)
            # Extract x, y1, and y2 values
            x = [item['epoch'] for item in data]
            y1 = [item['learning_rate'] for item in data]
            y2 = [item['loss'] for item in data]
            # Create the line chart
            fig, ax1 = plt.subplots(figsize=(10, 6))
            # Plot y1 (learning rate) on the first y-axis
            ax1.plot(x, y1, 'b-', label='Learning Rate')
            ax1.set_xlabel('Epoch')
            ax1.set_ylabel('Learning Rate', color='b')
            ax1.tick_params('y', colors='b')
            # Create a second y-axis
            ax2 = ax1.twinx()
            # Plot y2 (loss) on the second y-axis
            ax2.plot(x, y2, 'r-', label='Loss')
            ax2.set_ylabel('Loss', color='r')
            ax2.tick_params('y', colors='r')
            # Set the y-axis formatter to display numbers in scientific notation
            ax1.yaxis.set_major_formatter(ScalarFormatter(useMathText=True))
            ax1.ticklabel_format(style='sci', axis='y', scilimits=(0,0))
            # Add grid
            ax1.grid(True)
            # Combine the legends for both plots
            lines, labels = ax1.get_legend_handles_labels()
            lines2, labels2 = ax2.get_legend_handles_labels()
            ax2.legend(lines + lines2, labels + labels2, loc='best')
            # Set the title
            plt.title(f'{lora_name} LR and Loss vs Epoch')
            # Save the chart as an image
            plt.savefig(image_model_path)
            print(f"Graph saved in {image_model_path}")
        else:
            print(f"File 'training_graph.json' does not exist in the {lora_path}")
    except ImportError:
        print("matplotlib is not installed. Please install matplotlib to create PNG graphs")
--- a/swarms/modelui/extensions/Training_PRO/script.py
+++ b/swarms/modelui/extensions/Training_PRO/script.py
--- a/swarms/modelui/extensions/Training_PRO/train_utils.py
+++ b/swarms/modelui/extensions/Training_PRO/train_utils.py
@ -0,0 +1,368 @@
 import os
 from modules import shared, utils
 from pathlib import Path
 import requests
 import tqdm
 import json
 '''
 def get_gpu_memory_usage(rank):
    return {
        'total': round(torch.cuda.get_device_properties(rank).total_memory / (1024**3), 2),
        'max': round(torch.cuda.max_memory_allocated(rank) / (1024**3), 2),
        'reserved': round(torch.cuda.memory_reserved(rank) / (1024**3), 2),
        'allocated': round(torch.cuda.memory_allocated(rank) / (1024**3), 2)
    }
 '''
 def list_subfoldersByTime(directory):
    if not directory.endswith('/'):
        directory += '/'
    subfolders = []
    subfolders.append('None') 
    path = directory
    name_list = os.listdir(path)
    full_list = [os.path.join(path,i) for i in name_list]
    time_sorted_list = sorted(full_list, key=os.path.getmtime,reverse=True)
    for entry in time_sorted_list:
        if os.path.isdir(entry):
            entry_str = f"{entry}"  # Convert entry to a string
            full_path = entry_str
            entry_str = entry_str.replace('\\','/')
            entry_str = entry_str.replace(f"{directory}", "")  # Remove directory part
            subfolders.append(entry_str)
    return subfolders
 def get_available_loras_local(_sortedByTime):
    model_dir = shared.args.lora_dir  # Update with the appropriate directory path
    subfolders = []
    if _sortedByTime:
        subfolders = list_subfoldersByTime(model_dir)
    else:
        subfolders = utils.get_available_loras()        
    return subfolders
 # FPHAM SPLIT BY SENTENCE BLOCK ===============
 def split_sentences(text: str, cutoff_len: int):
    sentences = []
    sentence = ''
    delimiters = ['. ', '? ', '! ', '... ', '.\n', '?\n', '!\n','...\n','</s>','<//>']
    abbreviations = ['Mr. ', 'Mrs. ', 'Dr. ', 'Ms. ', 'St. ', 'Prof. ', 'Jr. ', 'Ltd. ', 'Capt. ', 'Col. ', 'Gen. ', 'Ave. ', 'Blvd. ', 'Co. ', 'Corp. ', 'Dept. ', 'Est. ', 'Gov. ', 'Inc. ', 'Ph.D. ', 'Univ. ']
    errors = 0
    max_cut = cutoff_len-1
    prev_char = ''  
    for char in text:
        sentence += char
        if (any(sentence.endswith(delimiter) for delimiter in delimiters) and
            not (prev_char.isupper() and len(sentence) >= 3 and sentence[-3] != ' ') and 
            not any(sentence.endswith(abbreviation) for abbreviation in abbreviations)):
            tokens = shared.tokenizer.encode(sentence)
            if len(tokens) > max_cut:
                tokens = tokens[:max_cut]
                sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True)
                errors = errors + 1
            sentences.append({'text': sentence, 'size': len(tokens)})
            sentence = ''
        prev_char = char
    if sentence:
        tokens = shared.tokenizer.encode(sentence)
        if len(tokens) > max_cut:
            tokens = tokens[:max_cut]
            sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True)  
            errors = errors + 1
        sentences.append({'text': sentence, 'size': len(tokens)})
    if errors > 0:
        print(f"Trimmed sentences beyond Cutoff Length: {errors}")
    return sentences
 # The goal of following code is to create blocks of text + overlapping blocks while:
 # respects sentence boundaries
 # always uses all the text 
 # hard cut defined by hard_cut_string or </s> will always end at the end of data block
 # no overlapping blocks will be created across hard cut or across </s> token
 def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool):
    EOSX_str = '<//>' #hardcut placeholder
    EOS_str = '</s>' 
    print("Precise raw text slicer: ON")
    cut_string = hard_cut_string.replace('\\n', '\n')
    text = text.replace(cut_string, EOSX_str)
    sentences = split_sentences(text, cutoff_len)
    print(f"Sentences: {len(sentences)}")
    sentencelist = []
    currentSentence = ''
    totalLength = 0
    max_cut = cutoff_len-1
    half_cut = cutoff_len//2
    halfcut_length = 0
    edgeindex = []
    half_index = 0
    for index, item in enumerate(sentences):
        if halfcut_length+ item['size'] < half_cut:
            halfcut_length += item['size']
            half_index = index
        else:
            edgeindex.append(half_index)
            halfcut_length = -2 * max_cut
        if totalLength + item['size'] < max_cut and not currentSentence.endswith(EOSX_str): 
            currentSentence += item['text']
            totalLength += item['size']
        else:
            if len(currentSentence.strip()) > min_chars_cut:
                sentencelist.append(currentSentence.strip())
            currentSentence = item['text']
            totalLength = item['size']
            halfcut_length = item['size']
    if len(currentSentence.strip()) > min_chars_cut:    
        sentencelist.append(currentSentence.strip())
    unique_blocks = len(sentencelist)
    print(f"Text Blocks: {unique_blocks}")
    #overlap strategies: 
    # don't overlap across HARD CUT (EOSX)
    if overlap:
        for edge_idx in edgeindex:
            currentSentence = ''
            totalLength = 0
            for item in sentences[edge_idx:]:
                if totalLength + item['size'] < max_cut:
                    currentSentence += item['text']
                    totalLength += item['size']
                else:
                    #if by chance EOSX is at the end then it's acceptable
                    if currentSentence.endswith(EOSX_str) and len(currentSentence.strip()) > min_chars_cut:
                            sentencelist.append(currentSentence.strip())    
                    # otherwise don't cross hard cut    
                    elif EOSX_str not in currentSentence and len(currentSentence.strip()) > min_chars_cut:
                        sentencelist.append(currentSentence.strip())
                    currentSentence = ''
                    totalLength = 0
                    break
        print(f"+ Overlapping blocks: {len(sentencelist)-unique_blocks}")
    num_EOS = 0
    for i in range(len(sentencelist)):
        if eos_to_hc:
            sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str)
        else:
            sentencelist[i] = sentencelist[i].replace(EOSX_str, '')
        #someone may have had stop strings in the raw text...
        sentencelist[i] = sentencelist[i].replace("</s></s>", EOS_str)
        num_EOS += sentencelist[i].count(EOS_str)
    if num_EOS > 0:
        print(f"+ EOS count: {num_EOS}")
    #final check for useless lines
    sentencelist = [item for item in sentencelist if item.strip() != "</s>"]
    sentencelist = [item for item in sentencelist if item.strip() != ""]
    if debug_slicer:
                    # Write the log file
        Path('logs').mkdir(exist_ok=True)
        sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
        output_file = "logs/sentencelist.json"
        with open(output_file, 'w') as f:
            json.dump(sentencelist_dict, f,indent=2)
        print("Saved sentencelist.json in logs folder")
    return sentencelist   
 def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool):
    EOSX_str = '<//>' #hardcut placeholder
    EOS_str = '</s>' 
    print("Mega Block Overlap: ON")
    cut_string = hard_cut_string.replace('\\n', '\n')
    text = text.replace(cut_string, EOSX_str)
    sentences = split_sentences(text, cutoff_len)
    print(f"Sentences: {len(sentences)}")
    sentencelist = []
    max_cut = cutoff_len-1
    #print(f"max_cut: {max_cut}")
    advancing_to = 0
    prev_block_lastsentence = ""
    for i in range(len(sentences)):
        totalLength = 0
        currentSentence = ''
        lastsentence = ""
        if i >= advancing_to:
            for k in range(i, len(sentences)):
                current_length = sentences[k]['size']
                if totalLength + current_length <= max_cut and not currentSentence.endswith(EOSX_str):
                    currentSentence += sentences[k]['text']
                    totalLength += current_length
                    lastsentence = sentences[k]['text']
                else:
                    if len(currentSentence.strip()) > min_chars_cut:
                        if prev_block_lastsentence!=lastsentence:
                            sentencelist.append(currentSentence.strip())
                            prev_block_lastsentence = lastsentence
                    advancing_to = 0
                    if currentSentence.endswith(EOSX_str):
                        advancing_to = k
                    currentSentence = ""
                    totalLength = 0
                    break
            if currentSentence != "":
                if len(currentSentence.strip()) > min_chars_cut:
                    sentencelist.append(currentSentence.strip())
    unique_blocks = len(sentencelist)
    print(f"Text Blocks: {unique_blocks}")
    num_EOS = 0
    for i in range(len(sentencelist)):
        if eos_to_hc:
            sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str)
        else:
            sentencelist[i] = sentencelist[i].replace(EOSX_str, '')
        #someone may have had stop strings in the raw text...
        sentencelist[i] = sentencelist[i].replace("</s></s>", EOS_str)
        num_EOS += sentencelist[i].count(EOS_str)
    if num_EOS > 0:
        print(f"+ EOS count: {num_EOS}")
    #final check for useless lines
    sentencelist = [item for item in sentencelist if item.strip() != "</s>"]
    sentencelist = [item for item in sentencelist if item.strip() != ""]
    if debug_slicer:
                    # Write the log file
        Path('logs').mkdir(exist_ok=True)
        sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
        output_file = "logs/sentencelist.json"
        with open(output_file, 'w') as f:
            json.dump(sentencelist_dict, f,indent=2)
        print("Saved sentencelist.json in logs folder")
    return sentencelist   
 # Example usage:
 # download_file_from_url('https://example.com/path/to/your/file.ext', '/output/directory')
 def download_file_from_url(url, overwrite, output_dir_in, valid_extensions = {'.txt', '.json'}):
    try:
    # Validate and sanitize the URL
    #parsed_url = urllib.parse.urlparse(url)
    #if not parsed_url.netloc:
    #    raise ValueError("Invalid URL")
    #filename = os.path.basename(parsed_url.path)
    # Get the filename from the URL
        session = requests.Session()
        headers = {}
        mode = 'wb'
        filename = url.split('/')[-1]
        output_dir = str(output_dir_in)
        # Construct the full path to the output file
        local_filename = os.path.join(output_dir, filename)
        # Check if the local file already exists
        overw = ''
        if os.path.exists(local_filename):
            if not overwrite:
                yield f"File '{local_filename}' already exists. Aborting."
                return
            else:
                overw = ' [Overwrite existing]'
        filename_lower = filename.lower()
        # Send an HTTP GET request to the URL with a timeout
        file_extension = os.path.splitext(filename_lower)[-1]
        if file_extension not in valid_extensions:
            yield f"Invalid file extension: {file_extension}. Only {valid_extensions} files are supported."
            return
        with session.get(url, stream=True, headers=headers, timeout=10) as r:
            r.raise_for_status() 
            # total size can be wildly inaccurate
            #total_size = int(r.headers.get('content-length', 0))
            block_size = 1024 * 4  
            with open(local_filename, mode) as f:
                count = 0
                for data in r.iter_content(block_size):
                    f.write(data)
                    count += len(data)
                    yield f"Downloaded: {count} " + overw
            # Verify file size if possible
            if os.path.exists(local_filename):
                downloaded_size = os.path.getsize(local_filename)
                if downloaded_size > 0:
                    yield f"File '{filename}' downloaded to '{output_dir}' ({downloaded_size} bytes)."
                    print("File Downloaded")
                else:
                    print("Downloaded file is zero")
                    yield f"Failed. Downloaded file size is zero)."
            else:
                print(f"Error: {local_filename} failed to download.")
                yield f"Error: {local_filename} failed to download"
    except Exception as e:
        print(f"An error occurred: {e}")
        yield f"An error occurred: {e}"
    finally:
        # Close the session to release resources
        session.close()
--- a/swarms/modelui/extensions/character_bias/script.py
+++ b/swarms/modelui/extensions/character_bias/script.py
@ -0,0 +1,83 @@
 import os
 import gradio as gr
 # get the current directory of the script
 current_dir = os.path.dirname(os.path.abspath(__file__))
 # check if the bias_options.txt file exists, if not, create it
 bias_file = os.path.join(current_dir, "bias_options.txt")
 if not os.path.isfile(bias_file):
    with open(bias_file, "w") as f:
        f.write("*I am so happy*\n*I am so sad*\n*I am so excited*\n*I am so bored*\n*I am so angry*")
 # read bias options from the text file
 with open(bias_file, "r") as f:
    bias_options = [line.strip() for line in f.readlines()]
 params = {
    "activate": True,
    "bias string": " *I am so happy*",
    "use custom string": False,
 }
 def input_modifier(string):
    """
    This function is applied to your text inputs before
    they are fed into the model.
    """
    return string
 def output_modifier(string):
    """
    This function is applied to the model outputs.
    """
    return string
 def bot_prefix_modifier(string):
    """
    This function is only applied in chat mode. It modifies
    the prefix text for the Bot and can be used to bias its
    behavior.
    """
    if params['activate']:
        if params['use custom string']:
            return f'{string} {params["custom string"].strip()} '
        else:
            return f'{string} {params["bias string"].strip()} '
    else:
        return string
 def ui():
    # Gradio elements
    activate = gr.Checkbox(value=params['activate'], label='Activate character bias')
    dropdown_string = gr.Dropdown(choices=bias_options, value=params["bias string"], label='Character bias', info='To edit the options in this dropdown edit the "bias_options.txt" file')
    use_custom_string = gr.Checkbox(value=False, label='Use custom bias textbox instead of dropdown')
    custom_string = gr.Textbox(value="", placeholder="Enter custom bias string", label="Custom Character Bias", info='To use this textbox activate the checkbox above')
    # Event functions to update the parameters in the backend
    def update_bias_string(x):
        if x:
            params.update({"bias string": x})
        else:
            params.update({"bias string": dropdown_string.get()})
        return x
    def update_custom_string(x):
        params.update({"custom string": x})
    dropdown_string.change(update_bias_string, dropdown_string, None)
    custom_string.change(update_custom_string, custom_string, None)
    activate.change(lambda x: params.update({"activate": x}), activate, None)
    use_custom_string.change(lambda x: params.update({"use custom string": x}), use_custom_string, None)
    # Group elements together depending on the selected option
    def bias_string_group():
        if use_custom_string.value:
            return gr.Group([use_custom_string, custom_string])
        else:
            return dropdown_string
--- a/swarms/modelui/extensions/elevenlabs_tts/outputs/outputs-will-be-saved-here.txt
+++ b/swarms/modelui/extensions/elevenlabs_tts/outputs/outputs-will-be-saved-here.txt
--- a/swarms/modelui/extensions/elevenlabs_tts/requirements.txt
+++ b/swarms/modelui/extensions/elevenlabs_tts/requirements.txt
@ -0,0 +1 @@
 elevenlabs==0.2.24
--- a/swarms/modelui/extensions/elevenlabs_tts/script.py
+++ b/swarms/modelui/extensions/elevenlabs_tts/script.py
@ -0,0 +1,197 @@
 import html
 import re
 from pathlib import Path
 import elevenlabs
 import gradio as gr
 from modules import chat, shared, ui_chat
 from modules.logging_colors import logger
 from modules.utils import gradio
 params = {
    'activate': True,
    'api_key': None,
    'selected_voice': 'None',
    'autoplay': False,
    'show_text': True,
    'model': 'eleven_monolingual_v1',
 }
 voices = None
 wav_idx = 0
 LANG_MODELS = ['eleven_monolingual_v1', 'eleven_multilingual_v1']
 def update_api_key(key):
    params['api_key'] = key
    if key is not None:
        elevenlabs.set_api_key(key)
 def refresh_voices():
    global params
    your_voices = elevenlabs.voices()
    voice_names = [voice.name for voice in your_voices]
    return voice_names
 def refresh_voices_dd():
    all_voices = refresh_voices()
    return gr.Dropdown.update(value=all_voices[0], choices=all_voices)
 def remove_tts_from_history(history):
    for i, entry in enumerate(history['internal']):
        history['visible'][i] = [history['visible'][i][0], entry[1]]
    return history
 def toggle_text_in_history(history):
    for i, entry in enumerate(history['visible']):
        visible_reply = entry[1]
        if visible_reply.startswith('<audio'):
            if params['show_text']:
                reply = history['internal'][i][1]
                history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>\n\n{reply}"]
            else:
                history['visible'][i] = [history['visible'][i][0], f"{visible_reply.split('</audio>')[0]}</audio>"]
    return history
 def remove_surrounded_chars(string):
    # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
    # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
    return re.sub('\*[^\*]*?(\*|$)', '', string)
 def state_modifier(state):
    if not params['activate']:
        return state
    state['stream'] = False
    return state
 def input_modifier(string):
    if not params['activate']:
        return string
    shared.processing_message = "*Is recording a voice message...*"
    return string
 def history_modifier(history):
    # Remove autoplay from the last reply
    if len(history['internal']) > 0:
        history['visible'][-1] = [
            history['visible'][-1][0],
            history['visible'][-1][1].replace('controls autoplay>', 'controls>')
        ]
    return history
 def output_modifier(string):
    global params, wav_idx
    if not params['activate']:
        return string
    original_string = string
    string = remove_surrounded_chars(string)
    string = string.replace('"', '')
    string = string.replace('“', '')
    string = string.replace('\n', ' ')
    string = string.strip()
    if string == '':
        string = 'empty reply, try regenerating'
    output_file = Path(f'extensions/elevenlabs_tts/outputs/{wav_idx:06d}.mp3'.format(wav_idx))
    print(f'Outputting audio to {str(output_file)}')
    try:
        audio = elevenlabs.generate(text=html.unescape(string), voice=params['selected_voice'], model=params['model'])
        elevenlabs.save(audio, str(output_file))
        autoplay = 'autoplay' if params['autoplay'] else ''
        string = f'<audio src="file/{output_file.as_posix()}" controls {autoplay}></audio>'
        wav_idx += 1
    except elevenlabs.api.error.UnauthenticatedRateLimitError:
        string = "🤖 ElevenLabs Unauthenticated Rate Limit Reached - Please create an API key to continue\n\n"
    except elevenlabs.api.error.RateLimitError:
        string = "🤖 ElevenLabs API Tier Limit Reached\n\n"
    except elevenlabs.api.error.APIError as err:
        string = f"🤖 ElevenLabs Error: {err}\n\n"
    if params['show_text']:
        string += f'\n\n{original_string}'
    shared.processing_message = "*Is typing...*"
    return string
 def ui():
    global voices
    if not voices:
        voices = refresh_voices()
        selected = params['selected_voice']
        if selected == 'None':
            params['selected_voice'] = voices[0]
        elif selected not in voices:
            logger.error(f'Selected voice {selected} not available, switching to {voices[0]}')
            params['selected_voice'] = voices[0]
    # Gradio elements
    with gr.Row():
        activate = gr.Checkbox(value=params['activate'], label='Activate TTS')
        autoplay = gr.Checkbox(value=params['autoplay'], label='Play TTS automatically')
        show_text = gr.Checkbox(value=params['show_text'], label='Show message text under audio player')
    with gr.Row():
        voice = gr.Dropdown(value=params['selected_voice'], choices=voices, label='TTS Voice')
        refresh = gr.Button(value='Refresh')
    with gr.Row():
        if params['api_key']:
            api_key = gr.Textbox(value=params['api_key'], label='API Key')
            update_api_key(params['api_key'])
        else:
            api_key = gr.Textbox(placeholder="Enter your API key.", label='API Key')
    with gr.Row():
        model = gr.Dropdown(value=params['model'], choices=LANG_MODELS, label='Language model')
    with gr.Row():
        convert = gr.Button('Permanently replace audios with the message texts')
        convert_cancel = gr.Button('Cancel', visible=False)
        convert_confirm = gr.Button('Confirm (cannot be undone)', variant="stop", visible=False)
    # Convert history with confirmation
    convert_arr = [convert_confirm, convert, convert_cancel]
    convert.click(lambda: [gr.update(visible=True), gr.update(visible=False), gr.update(visible=True)], None, convert_arr)
    convert_confirm.click(
        lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr).then(
        remove_tts_from_history, gradio('history'), gradio('history')).then(
        chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
        chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
    convert_cancel.click(lambda: [gr.update(visible=False), gr.update(visible=True), gr.update(visible=False)], None, convert_arr)
    # Toggle message text in history
    show_text.change(
        lambda x: params.update({"show_text": x}), show_text, None).then(
        toggle_text_in_history, gradio('history'), gradio('history')).then(
        chat.save_history, gradio('history', 'unique_id', 'character_menu', 'mode'), None).then(
        chat.redraw_html, gradio(ui_chat.reload_arr), gradio('display'))
    # Event functions to update the parameters in the backend
    activate.change(lambda x: params.update({'activate': x}), activate, None)
    voice.change(lambda x: params.update({'selected_voice': x}), voice, None)
    api_key.change(update_api_key, api_key, None)
    model.change(lambda x: params.update({'model': x}), model, None)
    # connect.click(check_valid_api, [], connection_status)
    refresh.click(refresh_voices_dd, [], voice)
    # Event functions to update the parameters in the backend
    autoplay.change(lambda x: params.update({"autoplay": x}), autoplay, None)
--- a/swarms/modelui/extensions/example/script.py
+++ b/swarms/modelui/extensions/example/script.py
@ -0,0 +1,139 @@
 """
 An example of extension. It does nothing, but you can add transformations
 before the return statements to customize the webui behavior.
 Starting from history_modifier and ending in output_modifier, the
 functions are declared in the same order that they are called at
 generation time.
 """
 import gradio as gr
 import torch
 from transformers import LogitsProcessor
 from modules import chat, shared
 from modules.text_generation import (
    decode,
    encode,
    generate_reply,
 )
 params = {
    "display_name": "Example Extension",
    "is_tab": False,
 }
 class MyLogits(LogitsProcessor):
    """
    Manipulates the probabilities for the next token before it gets sampled.
    Used in the logits_processor_modifier function below.
    """
    def __init__(self):
        pass
    def __call__(self, input_ids, scores):
        # probs = torch.softmax(scores, dim=-1, dtype=torch.float)
        # probs[0] /= probs[0].sum()
        # scores = torch.log(probs / (1 - probs))
        return scores
 def history_modifier(history):
    """
    Modifies the chat history.
    Only used in chat mode.
    """
    return history
 def state_modifier(state):
    """
    Modifies the state variable, which is a dictionary containing the input
    values in the UI like sliders and checkboxes.
    """
    return state
 def chat_input_modifier(text, visible_text, state):
    """
    Modifies the user input string in chat mode (visible_text).
    You can also modify the internal representation of the user
    input (text) to change how it will appear in the prompt.
    """
    return text, visible_text
 def input_modifier(string, state, is_chat=False):
    """
    In default/notebook modes, modifies the whole prompt.
    In chat mode, it is the same as chat_input_modifier but only applied
    to "text", here called "string", and not to "visible_text".
    """
    return string
 def bot_prefix_modifier(string, state):
    """
    Modifies the prefix for the next bot reply in chat mode.
    By default, the prefix will be something like "Bot Name:".
    """
    return string
 def tokenizer_modifier(state, prompt, input_ids, input_embeds):
    """
    Modifies the input ids and embeds.
    Used by the multimodal extension to put image embeddings in the prompt.
    Only used by loaders that use the transformers library for sampling.
    """
    return prompt, input_ids, input_embeds
 def logits_processor_modifier(processor_list, input_ids):
    """
    Adds logits processors to the list, allowing you to access and modify
    the next token probabilities.
    Only used by loaders that use the transformers library for sampling.
    """
    processor_list.append(MyLogits())
    return processor_list
 def output_modifier(string, state, is_chat=False):
    """
    Modifies the LLM output before it gets presented.
    In chat mode, the modified version goes into history['visible'],
    and the original version goes into history['internal'].
    """
    return string
 def custom_generate_chat_prompt(user_input, state, **kwargs):
    """
    Replaces the function that generates the prompt from the chat history.
    Only used in chat mode.
    """
    result = chat.generate_chat_prompt(user_input, state, **kwargs)
    return result
 def custom_css():
    """
    Returns a CSS string that gets appended to the CSS for the webui.
    """
    return ''
 def custom_js():
    """
    Returns a javascript string that gets appended to the javascript
    for the webui.
    """
    return ''
 def setup():
    """
    Gets executed only once, when the extension is imported.
    """
    pass
 def ui():
    """
    Gets executed when the UI is drawn. Custom gradio elements and
    their corresponding event handlers should be defined here.
    To learn about gradio components, check out the docs:
    https://gradio.app/docs/
    """
    pass
--- a/swarms/modelui/extensions/gallery/script.js
+++ b/swarms/modelui/extensions/gallery/script.js
@ -0,0 +1,33 @@
 let gallery_element = document.getElementById('gallery-extension');
 let chat_mode_element = document.getElementById('chat-mode');
 let extensions_block = document.getElementById('extensions');
 let extensions_block_size = extensions_block.childNodes.length;
 let gallery_only = (extensions_block_size == 5);
 document.querySelector('.header_bar').addEventListener('click', function(event) {
    if (event.target.tagName === 'BUTTON') {
        const buttonText = event.target.textContent.trim();
        let chat_visible = (buttonText == 'Chat');
        let default_visible = (buttonText == 'Default');
        let notebook_visible = (buttonText == 'Notebook');
        let chat_mode_visible = (chat_mode_element.offsetHeight > 0 && chat_mode_element.offsetWidth > 0);
        // Only show this extension in the Chat tab
        if (chat_visible) {
            if (chat_mode_visible) {
                gallery_element.style.display = 'block';
                extensions_block.style.display = '';
            } else {
                gallery_element.style.display = 'none';
                extensions_block.style.display = 'none';
            }
        } else {
            gallery_element.style.display = 'none';
            if (gallery_only) {
                extensions_block.style.display = 'none';
            }
        }
    }
 });
--- a/swarms/modelui/extensions/gallery/script.py
+++ b/swarms/modelui/extensions/gallery/script.py
@ -0,0 +1,101 @@
 from pathlib import Path
 import gradio as gr
 from modules.html_generator import get_image_cache
 from modules.shared import gradio
 def generate_css():
    css = """
      .character-gallery > .gallery {
        margin: 1rem 0;
        display: grid !important;
        grid-template-columns: repeat(auto-fit, minmax(150px, 1fr));
        grid-column-gap: 0.4rem;
        grid-row-gap: 1.2rem;
      }
      .character-gallery > .label {
        display: none !important;
      }
      .character-gallery button.gallery-item {
        display: contents;
      }
      .character-container {
        cursor: pointer;
        text-align: center;
        position: relative;
        opacity: 0.85;
      }
      .character-container:hover {
        opacity: 1;
      }
      .character-container .placeholder, .character-container img {
        width: 150px;
        height: 200px;
        background-color: gray;
        object-fit: cover;
        margin: 0 auto;
        border-radius: 1rem;
        border: 3px solid white;
        box-shadow: 3px 3px 6px 0px rgb(0 0 0 / 50%);
      }
      .character-name {
        margin-top: 0.3rem;
        display: block;
        font-size: 1.2rem;
        font-weight: 600;
        overflow-wrap: anywhere;
      }
    """
    return css
 def generate_html():
    cards = []
    # Iterate through files in image folder
    for file in sorted(Path("characters").glob("*")):
        if file.suffix in [".json", ".yml", ".yaml"]:
            character = file.stem
            container_html = '<div class="character-container">'
            image_html = "<div class='placeholder'></div>"
            for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
                if path.exists():
                    image_html = f'<img src="file/{get_image_cache(path)}">'
                    break
            container_html += f'{image_html} <span class="character-name">{character}</span>'
            container_html += "</div>"
            cards.append([container_html, character])
    return cards
 def select_character(evt: gr.SelectData):
    return (evt.value[1])
 def custom_js():
    path_to_js = Path(__file__).parent.resolve() / 'script.js'
    return open(path_to_js, 'r').read()
 def ui():
    with gr.Accordion("Character gallery", open=False, elem_id='gallery-extension'):
        update = gr.Button("Refresh")
        gr.HTML(value="<style>" + generate_css() + "</style>")
        gallery = gr.Dataset(components=[gr.HTML(visible=False)],
                             label="",
                             samples=generate_html(),
                             elem_classes=["character-gallery"],
                             samples_per_page=50
                             )
    update.click(generate_html, [], gallery)
    gallery.select(select_character, None, gradio['character_menu'])
--- a/swarms/modelui/extensions/google_translate/requirements.txt
+++ b/swarms/modelui/extensions/google_translate/requirements.txt
@ -0,0 +1 @@
 deep-translator==1.9.2
--- a/swarms/modelui/extensions/google_translate/script.py
+++ b/swarms/modelui/extensions/google_translate/script.py
@ -0,0 +1,59 @@
 import html
 import gradio as gr
 from deep_translator import GoogleTranslator
 params = {
    "activate": True,
    "language string": "ja",
 }
 language_codes = {'Afrikaans': 'af', 'Albanian': 'sq', 'Amharic': 'am', 'Arabic': 'ar', 'Armenian': 'hy', 'Azerbaijani': 'az', 'Basque': 'eu', 'Belarusian': 'be', 'Bengali': 'bn', 'Bosnian': 'bs', 'Bulgarian': 'bg', 'Catalan': 'ca', 'Cebuano': 'ceb', 'Chinese (Simplified)': 'zh-CN', 'Chinese (Traditional)': 'zh-TW', 'Corsican': 'co', 'Croatian': 'hr', 'Czech': 'cs', 'Danish': 'da', 'Dutch': 'nl', 'English': 'en', 'Esperanto': 'eo', 'Estonian': 'et', 'Finnish': 'fi', 'French': 'fr', 'Frisian': 'fy', 'Galician': 'gl', 'Georgian': 'ka', 'German': 'de', 'Greek': 'el', 'Gujarati': 'gu', 'Haitian Creole': 'ht', 'Hausa': 'ha', 'Hawaiian': 'haw', 'Hebrew': 'iw', 'Hindi': 'hi', 'Hmong': 'hmn', 'Hungarian': 'hu', 'Icelandic': 'is', 'Igbo': 'ig', 'Indonesian': 'id', 'Irish': 'ga', 'Italian': 'it', 'Japanese': 'ja', 'Javanese': 'jw', 'Kannada': 'kn', 'Kazakh': 'kk', 'Khmer': 'km', 'Korean': 'ko', 'Kurdish': 'ku', 'Kyrgyz': 'ky', 'Lao': 'lo', 'Latin': 'la', 'Latvian': 'lv', 'Lithuanian': 'lt', 'Luxembourgish': 'lb', 'Macedonian': 'mk', 'Malagasy': 'mg', 'Malay': 'ms', 'Malayalam': 'ml', 'Maltese': 'mt', 'Maori': 'mi', 'Marathi': 'mr', 'Mongolian': 'mn', 'Myanmar (Burmese)': 'my', 'Nepali': 'ne', 'Norwegian': 'no', 'Nyanja (Chichewa)': 'ny', 'Pashto': 'ps', 'Persian': 'fa', 'Polish': 'pl', 'Portuguese (Portugal, Brazil)': 'pt', 'Punjabi': 'pa', 'Romanian': 'ro', 'Russian': 'ru', 'Samoan': 'sm', 'Scots Gaelic': 'gd', 'Serbian': 'sr', 'Sesotho': 'st', 'Shona': 'sn', 'Sindhi': 'sd', 'Sinhala (Sinhalese)': 'si', 'Slovak': 'sk', 'Slovenian': 'sl', 'Somali': 'so', 'Spanish': 'es', 'Sundanese': 'su', 'Swahili': 'sw', 'Swedish': 'sv', 'Tagalog (Filipino)': 'tl', 'Tajik': 'tg', 'Tamil': 'ta', 'Telugu': 'te', 'Thai': 'th', 'Turkish': 'tr', 'Ukrainian': 'uk', 'Urdu': 'ur', 'Uzbek': 'uz', 'Vietnamese': 'vi', 'Welsh': 'cy', 'Xhosa': 'xh', 'Yiddish': 'yi', 'Yoruba': 'yo', 'Zulu': 'zu'}
 def input_modifier(string):
    """
    This function is applied to your text inputs before
    they are fed into the model.
    """
    if not params['activate']:
        return string
    return GoogleTranslator(source=params['language string'], target='en').translate(string)
 def output_modifier(string):
    """
    This function is applied to the model outputs.
    """
    if not params['activate']:
        return string
    translated_str = GoogleTranslator(source='en', target=params['language string']).translate(html.unescape(string))
    return html.escape(translated_str)
 def bot_prefix_modifier(string):
    """
    This function is only applied in chat mode. It modifies
    the prefix text for the Bot and can be used to bias its
    behavior.
    """
    return string
 def ui():
    # Finding the language name from the language code to use as the default value
    language_name = list(language_codes.keys())[list(language_codes.values()).index(params['language string'])]
    # Gradio elements
    with gr.Row():
        activate = gr.Checkbox(value=params['activate'], label='Activate translation')
    with gr.Row():
        language = gr.Dropdown(value=language_name, choices=[k for k in language_codes], label='Language')
    # Event functions to update the parameters in the backend
    activate.change(lambda x: params.update({"activate": x}), activate, None)
    language.change(lambda x: params.update({"language string": language_codes[x]}), language, None)
--- a/swarms/modelui/extensions/long_replies/script.py
+++ b/swarms/modelui/extensions/long_replies/script.py
@ -0,0 +1,143 @@
 import torch
 from modules import chat, shared
 from modules.text_generation import (
    decode,
    encode,
    generate_reply,
 )
 from transformers import LogitsProcessor
 import gradio as gr
 params = {
    "display_name": "Long replies",
    "is_tab": False,
    "min_length": 120,
 }
 initial_size = 0
 class MyLogits(LogitsProcessor):
    """
    Manipulates the probabilities for the next token before it gets sampled.
    Used in the logits_processor_modifier function below.
    """
    def __init__(self):
        self.newline_id = shared.tokenizer.encode('\n')[-1]
        pass
    def __call__(self, input_ids, scores):
        if input_ids.shape[-1] - initial_size < params["min_length"]:
            scores[...,self.newline_id] = -1000
            # scores[...,shared.tokenizer.eos_token_id] = -1000
        # probs = torch.softmax(scores, dim=-1, dtype=torch.float)
        # probs[0] /= probs[0].sum()
        # scores = torch.log(probs / (1 - probs))
        return scores
 def history_modifier(history):
    """
    Modifies the chat history.
    Only used in chat mode.
    """
    return history
 def state_modifier(state):
    """
    Modifies the state variable, which is a dictionary containing the input
    values in the UI like sliders and checkboxes.
    """
    return state
 def chat_input_modifier(text, visible_text, state):
    """
    Modifies the user input string in chat mode (visible_text).
    You can also modify the internal representation of the user
    input (text) to change how it will appear in the prompt.
    """
    return text, visible_text
 def input_modifier(string, state):
    """
    In default/notebook modes, modifies the whole prompt.
    In chat mode, it is the same as chat_input_modifier but only applied
    to "text", here called "string", and not to "visible_text".
    """
    return string
 def bot_prefix_modifier(string, state):
    """
    Modifies the prefix for the next bot reply in chat mode.
    By default, the prefix will be something like "Bot Name:".
    """
    return string
 def tokenizer_modifier(state, prompt, input_ids, input_embeds):
    """
    Modifies the input ids and embeds.
    Used by the multimodal extension to put image embeddings in the prompt.
    Only used by loaders that use the transformers library for sampling.
    """
    global initial_size
    initial_size = input_ids.shape[-1]
    return prompt, input_ids, input_embeds
 def logits_processor_modifier(processor_list, input_ids):
    """
    Adds logits processors to the list, allowing you to access and modify
    the next token probabilities.
    Only used by loaders that use the transformers library for sampling.
    """
    processor_list.append(MyLogits())
    return processor_list
 def output_modifier(string, state):
    """
    Modifies the LLM output before it gets presented.
    In chat mode, the modified version goes into history['visible'],
    and the original version goes into history['internal'].
    """
    return string
 def custom_generate_chat_prompt(user_input, state, **kwargs):
    """
    Replaces the function that generates the prompt from the chat history.
    Only used in chat mode.
    """
    result = chat.generate_chat_prompt(user_input, state, **kwargs)
    return result
 def custom_css():
    """
    Returns a CSS string that gets appended to the CSS for the webui.
    """
    return ''
 def custom_js():
    """
    Returns a javascript string that gets appended to the javascript
    for the webui.
    """
    return ''
 def setup():
    """
    Gets executed only once, when the extension is imported.
    """
    pass
 def ui():
    """
    Gets executed when the UI is drawn. Custom gradio elements and
    their corresponding event handlers should be defined here.
    To learn about gradio components, check out the docs:
    https://gradio.app/docs/
    """
    min_length = gr.Slider(0, 800, step=10, value=params['min_length'], label='Minimum reply length')
    min_length.change(lambda x: params.update({'min_length': x}), min_length, None)
--- a/swarms/modelui/extensions/multimodal/DOCS.md
+++ b/swarms/modelui/extensions/multimodal/DOCS.md
@ -0,0 +1,85 @@
 # Technical description of multimodal extension
 ## Working principle
 Multimodality extension does most of the stuff which is required for any image input:
 - adds the UI
 - saves the images as base64 JPEGs to history
 - provides the hooks to the UI
 - if there are images in the prompt, it:
    - splits the prompt to text and image parts
    - adds image start/end markers to text parts, then encodes and embeds the text parts
    - calls the vision pipeline to embed the images
    - stitches the embeddings together, and returns them to text generation
 - loads the appropriate vision pipeline, selected either from model name, or by specifying --multimodal-pipeline parameter
 Now, for the pipelines, they:
 - load the required vision models
 - return some consts, for example the number of tokens taken up by image
 - and most importantly: return the embeddings for LLM, given a list of images
 ## Prompts/history
 To save images in prompt/history, this extension is using a base64 JPEG, wrapped in a HTML tag, like so:
 ```
 <img src="data:image/jpeg;base64,{img_str}">
 ```
 where `{img_str}` is the actual image data. This format makes displaying them in the UI for free. Do note, that this format is required to be exactly the same, the regex used to find the images is: `<img src="data:image/jpeg;base64,([A-Za-z0-9+/=]+)">`.
 ## LLM input
 To describe the input, let's see it on an example prompt:
 ```
 text1<image1>text2<image2>text3
 ```
 where `textN` is N-th text, `<imageN>` is N-th image, in HTML format specified above.
 **The first step is to split the prompt into image/text parts**, so we get:
 ```
 ['text1', '<image1>', 'text2', '<image2>', 'text3']
 ```
 this is done in `MultimodalEmbedder._split_prompt(...)` function, which returns a list of `PromptPart`s - dataclasses wrapping the separate parts.
 This function also appends the image start/end markers to text, which are provided by `AbstractMultimodalPipeline.image_start()` / `AbstractMultimodalPipeline.image_end()` functions. If image start is `<Img>`, and end is `</Img>`, this function will return:
 ```
 ['text1<Img>', '<image1>', '</Img>text2<Img>', '<image2>', '</Img>text3']
 ```
 **The returned prompt parts are then turned into token embeddings.**
 First, they are modified to token IDs, for the text it is done using standard `modules.text_generation.encode()` function, and for the images the returned token IDs are changed to placeholders. The placeholder is a list of `N` times `placeholder token id`, where `N` is specified using `AbstractMultimodalPipeline.num_image_embeds()`, and placeholder token IDs using  `AbstractMultimodalPipeline.placeholder_token_id()`.
 Now, based on the token IDs, the prompt might get truncated, especially if `max_new_tokens` are unreasonably high. Unfortunately, it can't be done simply, just by trimming the prompt to be short enough. This way will lead to sometimes splitting the prompt in the middle of an image embedding, which usually breaks the generation. Therefore, in this case, the entire image needs to be removed from input. This is done inside `MultimodalEmbedder._encode_text(...)` function.
 **After the tokenization, the tokens need to get embedded**, the text and images are once again treated separately.
 The text parts are turned to embeddings, using `AbstractMultimodalPipeline.embed_tokens(...)` function. It uses standard embedding function from the model, but to support many LLMs, the actual function is returned by the pipeline (as it might be different for different LLMs), for LLaMA it is `shared.model.model.embed_tokens(...)`.
 The image parts are turned to embeddings, using `AbstractMultimodalPipeline.embed_images(...)` function. This function is specific for a given pipeline, it takes the images as input, forwards them through vision model/projector, and returns the embeddings.
 **Now, the returned embeddings are stitched together**, using `torch.cat()`, this is creating the final input to the LLM.
 ## Pipelines
 All of the pipelines should subclass `AbstractMultimodalPipeline` class. The idea is to allow for new pipelines to be added in the same way as user extensions - git clone into `extensions/multimodal/pipelines`.
 The pipelines are the description of the vision part, containing vision model/multimodal projector. All of the pipelines should have an unique `name()`, which is then selected by user, in `--multimodal-pipeline` CLI argument. For an example, see `pipelines/llava/llava.py`.
 ## Pipeline modules
 Pipelines are organized into "pipeline modules" - subdirectories in `pipelines` directory. The pipeline modules should contain a file called `pipelines.py`, that should contain the following fields:
 - `available_pipelines: List[str]` - list of pipelines provided by this module, shown as the list of available pipelines to the user
 - `def get_pipeline(name: str, params: dict) -> Optional[AbstractMultimodalPipeline]`: - a function to get a concrete pipeline by `name`, if `name` doesn't match any, should return `None`. `params` is the user settings for multimodal extension
 - `def get_pipeline_from_model_name(model_name: str, params: dict) -> Optional[AbstractMultimodalPipeline]`: - a function to get a pipeline from `model_name`, should be eager to return `None`, unless the determination can be done clearly (for example: minigpt-4 bases on vicuna - it should never return the pipeline, but llava can, as it has its own specific LLM finetune)
 **NOTE**: A pipeline module should lazy-import the pipelines only when necessary, and it should keep its imports to minimum
 ## Pipeline params
 The pipelines will get the extension `params` in the constructor. They should honor the following fields:
 - `vision_device` - string, specifying `torch.device` to run the vision model (CLIP/ViT) on
 - `vision_bits` - int, number of fp bits to load the vision model(s) in
 - `projector_device` - string, specifying `torch.device` to run the projector models (Linear layers, QFormer, etc.) on
 - `projector_bits` - int, number of fp bits to load the projector models in
 As a helper, `AbstractMultimodalPipeline` has `_get_device(self, setting_name: str, params: dict)` and `_get_dtype(self, setting_name: str, params: dict)` helper functions, which parse string/int and return `torch.device` / `torch.dtype`.
--- a/swarms/modelui/extensions/multimodal/README.md
+++ b/swarms/modelui/extensions/multimodal/README.md
@ -0,0 +1,91 @@
 # Multimodal
 ## Description
 Adds support for multimodality (text+images) to text-generation-webui.
 Note: multimodal currently only works for transformers, AutoGPTQ, and GPTQ-for-LLaMa loaders. ExLlama (v1 and v2) and llama.cpp support are planned.
 https://user-images.githubusercontent.com/3718215/233817203-69b57e77-0c55-4fd6-b742-3204bb13b8fc.mp4
 ## Usage
 To run this extension, download a LLM that supports multimodality, and then start server.py with the appropriate `--multimodal-pipeline` argument. Examples:
 ```
 # LLaVA 1.5 13B has the best performance
 python server.py --model liuhaotian_llava-v1.5-13b --multimodal-pipeline llava-v1.5-13b --load-in-4bit
 # LLaVA 1.5 7B is relatively weaker, but requires less memory
 python server.py --model liuhaotian_llava-v1.5-7b --multimodal-pipeline llava-v1.5-7b --load-in-4bit
 python server.py --model TheBloke_llava-v1.5-13B-GPTQ_gptq-4bit-32g-actorder_True --multimodal-pipeline llava-v1.5-13b --disable_exllama --loader autogptq
 python server.py --model wojtab_llava-7b-v0-4bit-128g --multimodal-pipeline llava-7b
 python server.py --model wojtab_llava-13b-v0-4bit-128g --multimodal-pipeline llava-13b
 python server.py --model anon8231489123_vicuna-13b-GPTQ-4bit-128g --multimodal-pipeline minigpt4-13b
 python server.py --model llama-7b-4bit --multimodal-pipeline minigpt4-7b
 ```
 There is built-in support for LLaVA-v0-13B, LLaVA-v0-7b, and LLaVA-v1.5-13B. To install `minigpt4`:
 - clone https://github.com/Wojtab/minigpt-4-pipeline into `extensions/multimodal/pipelines`
 - install the requirements.txt
 The same procedure should be used to install other pipelines, which can then be used with `--multimodal-pipeline [pipeline name]`. For additional multimodal pipelines refer to the compatibility section below.
 Do note, that each image takes up a considerable amount of tokens, so adjust `max_new_tokens` to be at most 1700 (recommended value is between 200 to 500), so the images don't get truncated.
 To send an image, just upload it to the extension field below chat, and send a prompt as always. The image will be added to the end of your message. If you wish to modify the placement, include a string `<image>` in your prompt.
 Additionally, there is *Embed all images, not only the last one* checkbox. It modifies the image embeddings, by default (if it's unchecked), all but the most recent images have their embeddings empty, so they are not fed to the network. It seems as if some multimodal networks consider the features in all images at the same time as if they were a single image. Due to this behavior, by default, the extension skips previous images. However, it can lead to sub-par generation on other pipelines. If you want to include all images, just tick this checkbox.
 ## Compatibility
 As of now, the following multimodal pipelines are supported:
 |Pipeline|`--multimodal-pipeline`|Default LLM|LLM info(for the linked model)|Pipeline repository|
 |-|-|-|-|-|
 |[LLaVA 13B](https://github.com/haotian-liu/LLaVA)|`llava-13b`|[LLaVA 13B](https://huggingface.co/wojtab/llava-13b-v0-4bit-128g)|GPTQ 4-bit quant, old CUDA|built-in|
 |[LLaVA 7B](https://github.com/haotian-liu/LLaVA)|`llava-7b`|[LLaVA 7B](https://huggingface.co/wojtab/llava-7b-v0-4bit-128g)|GPTQ 4-bit quant, old CUDA|built-in|
 |[MiniGPT-4 7B](https://github.com/Vision-CAIR/MiniGPT-4)|`minigpt4-7b`|[Vicuna v0 7B](https://huggingface.co/TheBloke/vicuna-7B-GPTQ-4bit-128g)|GPTQ 4-bit quant, new format|[Wojtab/minigpt-4-pipeline](https://github.com/Wojtab/minigpt-4-pipeline)|
 |[MiniGPT-4 13B](https://github.com/Vision-CAIR/MiniGPT-4)|`minigpt4-13b`|[Vicuna v0 13B](https://huggingface.co/anon8231489123/vicuna-13b-GPTQ-4bit-128g)|GPTQ 4-bit quant, old CUDA|[Wojtab/minigpt-4-pipeline](https://github.com/Wojtab/minigpt-4-pipeline)|
 |[InstructBLIP 7B](https://github.com/salesforce/LAVIS/tree/main/projects/instructblip)|`instructblip-7b`|[Vicuna v1.1 7B](https://huggingface.co/TheBloke/vicuna-7B-1.1-GPTQ-4bit-128g)|GPTQ 4-bit quant|[kjerk/instructblip-pipeline](https://github.com/kjerk/instructblip-pipeline)|
 |[InstructBLIP 13B](https://github.com/salesforce/LAVIS/tree/main/projects/instructblip)|`instructblip-13b`|[Vicuna v1.1 13B](https://huggingface.co/TheBloke/vicuna-13B-1.1-GPTQ-4bit-128g)|GPTQ 4-bit quant|[kjerk/instructblip-pipeline](https://github.com/kjerk/instructblip-pipeline)|
 Some pipelines could support different LLMs but do note that while it might work, it isn't a supported configuration.
 DO NOT report bugs if you are using a different LLM.
 DO NOT report bugs with pipelines in this repository (unless they are built-in)
 ## Extension config
 This extension uses the following parameters (from `settings.json`):
 |Parameter|Description|
 |---------|-----------|
 |`multimodal-vision_bits`|Number of bits to load vision models (CLIP/ViT) feature extractor in (most pipelines should support either 32 or 16, default=32)|
 |`multimodal-vision_device`|Torch device to run the feature extractor on, for example, `cpu` or `cuda:0`, by default `cuda:0` if available|
 |`multimodal-projector_bits`|Number of bits to load feature projector model(s) in (most pipelines should support either 32 or 16, default=32)|
 |`multimodal-projector_device`|Torch device to run the feature projector model(s) on, for example `cpu` or `cuda:0`, by default `cuda:0` if available|
 |`multimodal-add_all_images_to_prompt`|Default value of "Embed all images, not only the last one" checkbox|
 ## Usage through API
 You can run the multimodal inference through API, by inputting the images to prompt. Images are embedded like so: `f'<img src="data:image/jpeg;base64,{img_str}">'`, where `img_str` is base-64 jpeg data. Note that you will need to launch `server.py` with the arguments `--api --extensions multimodal`. 
 Python example:
 ```Python
 import base64
 import requests
 CONTEXT = "You are LLaVA, a large language and vision assistant trained by UW Madison WAIV Lab. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language. Follow the instructions carefully and explain your answers in detail.### Human: Hi!### Assistant: Hi there! How can I help you today?\n"
 with open('extreme_ironing.jpg', 'rb') as f:
    img_str = base64.b64encode(f.read()).decode('utf-8')
    prompt = CONTEXT + f'### Human: What is unusual about this image: \n<img src="data:image/jpeg;base64,{img_str}">### Assistant: '
    print(requests.post('http://127.0.0.1:5000/api/v1/generate', json={'prompt': prompt, 'stopping_strings': ['\n###']}).json())
 ```
 script output:
 ```Python
 {'results': [{'text': "The unusual aspect of this image is that a man is standing on top of a yellow minivan while doing his laundry. He has set up a makeshift clothes line using the car's rooftop as an outdoor drying area. This scene is uncommon because people typically do their laundry indoors, in a dedicated space like a laundromat or a room in their home, rather than on top of a moving vehicle. Additionally, hanging clothes on the car could be potentially hazardous or illegal in some jurisdictions due to the risk of damaging the vehicle or causing accidents on the road.\n##"}]}
 ```
 ## For pipeline developers/technical description
 see [DOCS.md](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/multimodal/DOCS.md)
--- a/swarms/modelui/extensions/multimodal/abstract_pipeline.py
+++ b/swarms/modelui/extensions/multimodal/abstract_pipeline.py
@ -0,0 +1,63 @@
 from abc import ABC, abstractmethod
 from typing import List, Optional
 import torch
 from PIL import Image
 from transformers import is_torch_xpu_available
 class AbstractMultimodalPipeline(ABC):
    @staticmethod
    @abstractmethod
    def name() -> str:
        'name of the pipeline, should be same as in --multimodal-pipeline'
        pass
    @staticmethod
    @abstractmethod
    def image_start() -> Optional[str]:
        'return image start string, string representation of image start token, or None if not applicable'
        pass
    @staticmethod
    @abstractmethod
    def image_end() -> Optional[str]:
        'return image end string, string representation of image end token, or None if not applicable'
        pass
    @staticmethod
    @abstractmethod
    def placeholder_token_id() -> int:
        'return placeholder token id'
        pass
    @staticmethod
    @abstractmethod
    def num_image_embeds() -> int:
        'return the number of embeds used by a single image (for example: 256 for LLaVA)'
        pass
    @abstractmethod
    def embed_images(self, images: List[Image.Image]) -> torch.Tensor:
        'forward the images through vision pipeline, and return their embeddings'
        pass
    @staticmethod
    @abstractmethod
    def embed_tokens(input_ids: torch.Tensor) -> torch.Tensor:
        'embed tokens, the exact function varies by LLM, for LLaMA it is `shared.model.model.embed_tokens`'
        pass
    @staticmethod
    @abstractmethod
    def placeholder_embeddings() -> torch.Tensor:
        'get placeholder embeddings if there are multiple images, and `add_all_images_to_prompt` is False'
        pass
    def _get_device(self, setting_name: str, params: dict):
        if params[setting_name] is None:
            return torch.device("cuda:0" if torch.cuda.is_available() else "xpu:0" if is_torch_xpu_available() else "cpu")
        return torch.device(params[setting_name])
    def _get_dtype(self, setting_name: str, params: dict):
        return torch.float32 if int(params[setting_name]) == 32 else torch.float16
--- a/swarms/modelui/extensions/multimodal/multimodal_embedder.py
+++ b/swarms/modelui/extensions/multimodal/multimodal_embedder.py
@ -0,0 +1,178 @@
 import base64
 import re
 from dataclasses import dataclass
 from io import BytesIO
 from typing import Any, List, Optional
 import torch
 from PIL import Image
 from extensions.multimodal.pipeline_loader import load_pipeline
 from modules import shared
 from modules.logging_colors import logger
 from modules.text_generation import encode, get_max_prompt_length
@dataclass
 class PromptPart:
    text: str
    image: Optional[Image.Image] = None
    is_image: bool = False
    input_ids: Optional[torch.Tensor] = None
    embedding: Optional[torch.Tensor] = None
 class MultimodalEmbedder:
    def __init__(self, params: dict):
        pipeline, source = load_pipeline(params)
        self.pipeline = pipeline
        logger.info(f'Multimodal: loaded pipeline {self.pipeline.name()} from pipelines/{source} ({self.pipeline.__class__.__name__})')
    def _split_prompt(self, prompt: str, load_images: bool = False) -> List[PromptPart]:
        """Splits a prompt into a list of `PromptParts` to separate image data from text.
        It will also append `image_start` and `image_end` before and after the image, and optionally parse and load the images,
        if `load_images` is `True`.
        """
        parts: List[PromptPart] = []
        curr = 0
        while True:
            match = re.search(r'<img src="data:image/jpeg;base64,([A-Za-z0-9+/=]+)">', prompt[curr:])
            if match is None:
                # no more image tokens, append the rest of the prompt
                if curr > 0:
                    # add image end token after last image
                    parts.append(PromptPart(text=self.pipeline.image_end() + prompt[curr:]))
                else:
                    parts.append(PromptPart(text=prompt))
                break
            # found an image, append image start token to the text
            if match.start() > 0:
                parts.append(PromptPart(text=prompt[curr:curr + match.start()] + self.pipeline.image_start()))
            else:
                parts.append(PromptPart(text=self.pipeline.image_start()))
            # append the image
            parts.append(PromptPart(
                text=match.group(0),
                image=Image.open(BytesIO(base64.b64decode(match.group(1)))) if load_images else None,
                is_image=True
            ))
            curr += match.end()
        return parts
    def _len_in_tokens_prompt_parts(self, parts: List[PromptPart]) -> int:
        """Total length in tokens of all `parts`"""
        tokens = 0
        for part in parts:
            if part.is_image:
                tokens += self.pipeline.num_image_embeds()
            elif part.input_ids is not None:
                tokens += len(part.input_ids)
            else:
                tokens += len(encode(part.text)[0])
        return tokens
    def len_in_tokens(self, prompt: str) -> int:
        """Total length in tokens for a given text `prompt`"""
        parts = self._split_prompt(prompt, False)
        return self._len_in_tokens_prompt_parts(parts)
    def _encode_single_text(self, part: PromptPart, add_bos_token: bool) -> PromptPart:
        """Encode a single prompt `part` to `input_ids`. Returns a `PromptPart`"""
        if part.is_image:
            placeholders = torch.ones((self.pipeline.num_image_embeds())) * self.pipeline.placeholder_token_id()
            part.input_ids = placeholders.to(shared.model.device, dtype=torch.int64)
        else:
            part.input_ids = encode(part.text, add_bos_token=add_bos_token)[0].to(shared.model.device, dtype=torch.int64)
        return part
    @staticmethod
    def _num_images(parts: List[PromptPart]) -> int:
        count = 0
        for part in parts:
            if part.is_image:
                count += 1
        return count
    def _encode_text(self, state, parts: List[PromptPart]) -> List[PromptPart]:
        """Encode text to token_ids, also truncate the prompt, if necessary.
        The chat/instruct mode should make prompts that fit in get_max_prompt_length, but if max_new_tokens are set
        such that the context + min_rows don't fit, we can get a prompt which is too long.
        We can't truncate image embeddings, as it leads to broken generation, so remove the images instead and warn the user
        """
        encoded: List[PromptPart] = []
        for i, part in enumerate(parts):
            encoded.append(self._encode_single_text(part, i == 0 and state['add_bos_token']))
        # truncation:
        max_len = get_max_prompt_length(state)
        removed_images = 0
        # 1. remove entire text/image blocks
        while self._len_in_tokens_prompt_parts(encoded[1:]) > max_len:
            if encoded[0].is_image:
                removed_images += 1
            encoded = encoded[1:]
        # 2. check if the last prompt part doesn't need to get truncated
        if self._len_in_tokens_prompt_parts(encoded) > max_len:
            if encoded[0].is_image:
                # don't truncate image embeddings, just remove the image, otherwise generation will be broken
                removed_images += 1
                encoded = encoded[1:]
            elif len(encoded) > 1 and encoded[0].text.endswith(self.pipeline.image_start()):
                # see if we can keep image_start token
                len_image_start = len(encode(self.pipeline.image_start(), add_bos_token=state['add_bos_token'])[0])
                if self._len_in_tokens_prompt_parts(encoded[1:]) + len_image_start > max_len:
                    # we can't -> remove this text, and the image
                    encoded = encoded[2:]
                    removed_images += 1
                else:
                    # we can -> just truncate the text
                    trunc_len = self._len_in_tokens_prompt_parts(encoded) - max_len
                    encoded[0].input_ids = encoded[0].input_ids[trunc_len:]
            elif len(encoded) > 0:
                # only one text left, truncate it normally
                trunc_len = self._len_in_tokens_prompt_parts(encoded) - max_len
                encoded[0].input_ids = encoded[0].input_ids[trunc_len:]
        # notify user if we truncated an image
        if removed_images > 0:
            logger.warning(f"Multimodal: removed {removed_images} image(s) from prompt. Try decreasing max_new_tokens if generation is broken")
        return encoded
    def _embed(self, parts: List[PromptPart]) -> List[PromptPart]:
        # batch images
        image_indicies = [i for i, part in enumerate(parts) if part.is_image]
        embedded = self.pipeline.embed_images([parts[i].image for i in image_indicies])
        for i, embeds in zip(image_indicies, embedded):
            parts[i].embedding = embeds
        # embed text
        for (i, part) in enumerate(parts):
            if not part.is_image:
                parts[i].embedding = self.pipeline.embed_tokens(part.input_ids)
        return parts
    def _remove_old_images(self, parts: List[PromptPart], params: dict) -> List[PromptPart]:
        if params['add_all_images_to_prompt']:
            return parts
        already_added = False
        for i, part in reversed(list(enumerate(parts))):
            if part.is_image:
                if already_added:
                    parts[i].embedding = self.pipeline.placeholder_embeddings()
                else:
                    already_added = True
        return parts
    def forward(self, prompt: str, state: Any, params: dict):
        prompt_parts = self._split_prompt(prompt, True)
        prompt_parts = self._encode_text(state, prompt_parts)
        prompt_parts = self._embed(prompt_parts)
        prompt_parts = self._remove_old_images(prompt_parts, params)
        embeds = tuple(part.embedding for part in prompt_parts)
        ids = tuple(part.input_ids for part in prompt_parts)
        input_embeds = torch.cat(embeds, dim=0)
        input_ids = torch.cat(ids, dim=0)
        return prompt, input_ids, input_embeds, self._num_images(prompt_parts)
--- a/swarms/modelui/extensions/multimodal/pipeline_loader.py
+++ b/swarms/modelui/extensions/multimodal/pipeline_loader.py
@ -0,0 +1,52 @@
 import traceback
 from importlib import import_module
 from pathlib import Path
 from typing import Tuple
 from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
 from modules import shared
 from modules.logging_colors import logger
 def _get_available_pipeline_modules():
    pipeline_path = Path(__file__).parent / 'pipelines'
    modules = [p for p in pipeline_path.iterdir() if p.is_dir()]
    return [m.name for m in modules if (m / 'pipelines.py').exists()]
 def load_pipeline(params: dict) -> Tuple[AbstractMultimodalPipeline, str]:
    pipeline_modules = {}
    available_pipeline_modules = _get_available_pipeline_modules()
    for name in available_pipeline_modules:
        try:
            pipeline_modules[name] = import_module(f'extensions.multimodal.pipelines.{name}.pipelines')
        except:
            logger.warning(f'Failed to get multimodal pipelines from {name}')
            logger.warning(traceback.format_exc())
    if shared.args.multimodal_pipeline is not None:
        for k in pipeline_modules:
            if hasattr(pipeline_modules[k], 'get_pipeline'):
                pipeline = getattr(pipeline_modules[k], 'get_pipeline')(shared.args.multimodal_pipeline, params)
                if pipeline is not None:
                    return (pipeline, k)
    else:
        model_name = shared.args.model.lower()
        for k in pipeline_modules:
            if hasattr(pipeline_modules[k], 'get_pipeline_from_model_name'):
                pipeline = getattr(pipeline_modules[k], 'get_pipeline_from_model_name')(model_name, params)
                if pipeline is not None:
                    return (pipeline, k)
    available = []
    for k in pipeline_modules:
        if hasattr(pipeline_modules[k], 'available_pipelines'):
            pipelines = getattr(pipeline_modules[k], 'available_pipelines')
            available += pipelines
    if shared.args.multimodal_pipeline is not None:
        log = f'Multimodal - ERROR: Failed to load multimodal pipeline "{shared.args.multimodal_pipeline}", available pipelines are: {available}.'
    else:
        log = f'Multimodal - ERROR: Failed to determine multimodal pipeline for model {shared.args.model}, please select one manually using --multimodal-pipeline [PIPELINE]. Available pipelines are: {available}.'
    logger.critical(f'{log} Please specify a correct pipeline, or disable the extension')
    raise RuntimeError(f'{log} Please specify a correct pipeline, or disable the extension')
--- a/swarms/modelui/extensions/multimodal/pipelines/llava/README.md
+++ b/swarms/modelui/extensions/multimodal/pipelines/llava/README.md
@ -0,0 +1,9 @@
 ## LLaVA pipeline
 This module provides 2 pipelines:
 - `llava-7b` - for use with LLaVA v0 7B model (finetuned LLaMa 7B)
 - `llava-13b` - for use with LLaVA v0 13B model (finetuned LLaMa 13B)
 [LLaVA](https://github.com/haotian-liu/LLaVA) uses CLIP `openai/clip-vit-large-patch14` as the vision model, and then a single linear layer. For 13B the projector weights are in `liuhaotian/LLaVA-13b-delta-v0`, and for 7B they are in `liuhaotian/LLaVA-7b-delta-v0`.
 The supported parameter combinations for both the vision model, and the projector are: CUDA/32bit, CUDA/16bit, CPU/32bit
--- a/swarms/modelui/extensions/multimodal/pipelines/llava/llava.py
+++ b/swarms/modelui/extensions/multimodal/pipelines/llava/llava.py
@ -0,0 +1,262 @@
 import time
 from abc import abstractmethod
 from typing import List, Tuple
 import torch
 from huggingface_hub import hf_hub_download
 from PIL import Image
 from transformers import CLIPImageProcessor, CLIPVisionModel
 from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
 from modules import shared
 from modules.logging_colors import logger
 from modules.text_generation import encode
 def expand2square(pil_img: Image.Image, background_color: Tuple[int]) -> Image.Image:
    width, height = pil_img.size
    if width == height:
        return pil_img
    elif width > height:
        result = Image.new(pil_img.mode, (width, width), background_color)
        result.paste(pil_img, (0, (width - height) // 2))
        return result
    else:
        result = Image.new(pil_img.mode, (height, height), background_color)
        result.paste(pil_img, ((height - width) // 2, 0))
        return result
 class LLaVA_v0_Pipeline(AbstractMultimodalPipeline):
    CLIP_REPO = "openai/clip-vit-large-patch14"
    def __init__(self, params: dict) -> None:
        super().__init__()
        self.clip_device = self._get_device("vision_device", params)
        self.clip_dtype = self._get_dtype("vision_bits", params)
        self.projector_device = self._get_device("projector_device", params)
        self.projector_dtype = self._get_dtype("projector_bits", params)
        self.image_processor, self.vision_tower, self.mm_projector = self._load_models()
    def _load_models(self):
        start_ts = time.time()
        logger.info(f"LLaVA - Loading CLIP from {self.CLIP_REPO} as {self.clip_dtype} on {self.clip_device}...")
        image_processor = CLIPImageProcessor.from_pretrained(self.CLIP_REPO, torch_dtype=self.clip_dtype)
        vision_tower = CLIPVisionModel.from_pretrained(self.CLIP_REPO, torch_dtype=self.clip_dtype).to(self.clip_device)
        logger.info(f"LLaVA - Loading projector from {self.llava_projector_repo()} as {self.projector_dtype} on {self.projector_device}...")
        projector_path = hf_hub_download(self.llava_projector_repo(), self.llava_projector_filename())
        mm_projector = self.build_mm_projector()
        projector_data = torch.load(projector_path)
        projector_data = {k[19:]: v for k, v in projector_data.items() if k.startswith('model.mm_projector.')}
        mm_projector.load_state_dict(projector_data)
        mm_projector = mm_projector.to(self.projector_device)
        logger.info(f"LLaVA supporting models loaded, took {time.time() - start_ts:.2f} seconds")
        return image_processor, vision_tower, mm_projector
    def build_mm_projector(self) -> torch.nn.Module:
        projector_shape = self.llava_projector_shape()
        if len(projector_shape) == 2:
            return torch.nn.Linear(*projector_shape)
        else:
            modules = []
            modules.append(torch.nn.Linear(projector_shape[0], projector_shape[1]))
            for i in range(2, len(projector_shape)):
                modules.append(torch.nn.GELU())
                modules.append(torch.nn.Linear(projector_shape[i-1], projector_shape[i]))
            return torch.nn.Sequential(*modules)
    @staticmethod
    def image_start() -> str:
        return "<im_start>"
    @staticmethod
    def image_end() -> str:
        return "<im_end>"
    @staticmethod
    def num_image_embeds() -> int:
        return 256
    @staticmethod
    def embed_tokens(input_ids: torch.Tensor) -> torch.Tensor:
        for attr in ['', 'model', 'model.model', 'model.model.model']:
            tmp = getattr(shared.model, attr, None) if attr != '' else shared.model
            if tmp is not None and hasattr(tmp, 'embed_tokens'):
                func = tmp.embed_tokens
                break
        else:
            raise ValueError('The embed_tokens method has not been found for this loader.')
        return func(input_ids).to(shared.model.device, dtype=shared.model.dtype)
    @staticmethod
    def placeholder_embeddings() -> torch.Tensor:
        return LLaVA_v0_Pipeline.embed_tokens(encode("<im_patch>"*256, add_bos_token=False)[0])
    def embed_images(self, images: List[Image.Image]) -> torch.Tensor:
        images = self.image_processor(images, return_tensors='pt')['pixel_values']
        images = images.to(self.clip_device, dtype=self.clip_dtype)
        with torch.no_grad():
            image_forward_outs = self.vision_tower(images, output_hidden_states=True)
            select_hidden_state_layer = -2
            select_hidden_state = image_forward_outs.hidden_states[select_hidden_state_layer]
            image_features = select_hidden_state[:, 1:].to(self.projector_device, dtype=self.projector_dtype)
            image_features = self.mm_projector(image_features)
        return image_features.to(shared.model.device, dtype=shared.model.dtype)
    @staticmethod
    @abstractmethod
    def llava_projector_repo() -> str:
        pass
    @staticmethod
    @abstractmethod
    def llava_projector_filename() -> str:
        pass
    @staticmethod
    @abstractmethod
    def llava_projector_shape() -> Tuple[int, int]:
        pass
 class LLaVA_v0_13B_Pipeline(LLaVA_v0_Pipeline):
    def __init__(self, params: dict) -> None:
        super().__init__(params)
    @staticmethod
    def name() -> str:
        return "llava-13b"
    @staticmethod
    def placeholder_token_id() -> int:
        return 32000
    @staticmethod
    def llava_projector_shape() -> Tuple[int, int]:
        return (1024, 5120)
    @staticmethod
    def llava_projector_filename() -> str:
        return "mm_projector.bin"
    @staticmethod
    def llava_projector_repo() -> str:
        return "liuhaotian/LLaVA-13b-delta-v0"
 class LLaVA_v0_7B_Pipeline(LLaVA_v0_Pipeline):
    def __init__(self, params: dict) -> None:
        super().__init__(params)
    @staticmethod
    def name() -> str:
        return "llava-7b"
    @staticmethod
    def placeholder_token_id() -> int:
        return 32001
    @staticmethod
    def llava_projector_shape() -> Tuple[int, int]:
        return (1024, 4096)
    @staticmethod
    def llava_projector_filename() -> str:
        return "mm_projector.bin"
    @staticmethod
    def llava_projector_repo() -> str:
        return "liuhaotian/LLaVA-7b-delta-v0"
 class LLaVA_LLaMA_2_13B_Pipeline(LLaVA_v0_13B_Pipeline):
    def __init__(self, params: dict) -> None:
        super().__init__(params)
    @staticmethod
    def name() -> str:
        return "llava-llama-2-13b"
    @staticmethod
    def placeholder_token_id() -> int:
        return 0
    @staticmethod
    def llava_projector_repo() -> str:
        return "liuhaotian/llava-llama-2-13b-chat-lightning-preview"
    @staticmethod
    def image_start() -> str:
        return ""
    @staticmethod
    def image_end() -> str:
        return ""
    @staticmethod
    def placeholder_embeddings() -> torch.Tensor:
        return LLaVA_v0_Pipeline.embed_tokens(encode("<unk>"*256, add_bos_token=False)[0])
 class LLaVA_v1_5_13B_Pipeline(LLaVA_v0_13B_Pipeline):
    CLIP_REPO = "openai/clip-vit-large-patch14-336"
    def __init__(self, params: dict) -> None:
        super().__init__(params)
    @staticmethod
    def name() -> str:
        return "llava-v1.5-13b"
    @staticmethod
    def llava_projector_shape() -> Tuple[int, int]:
        return (1024, 5120, 5120)
    @staticmethod
    def placeholder_token_id() -> int:
        return 0
    @staticmethod
    def llava_projector_repo() -> str:
        return "liuhaotian/llava-v1.5-13b"
    @staticmethod
    def image_start() -> str:
        return ""
    @staticmethod
    def image_end() -> str:
        return ""
    @staticmethod
    def num_image_embeds() -> int:
        return 576
    def embed_images(self, images: List[Image.Image]) -> torch.Tensor:
        # pad it to square first
        images = [
            expand2square(image, tuple(int(x*255) for x in self.image_processor.image_mean))
            for image in images
        ]
        return super().embed_images(images)
    @staticmethod
    def placeholder_embeddings() -> torch.Tensor:
        return LLaVA_v0_Pipeline.embed_tokens(encode("<unk>"*576, add_bos_token=False)[0])
 class LLaVA_v1_5_7B_Pipeline(LLaVA_v1_5_13B_Pipeline):
    @staticmethod
    def name() -> str:
        return "llava-v1.5-7b"
    @staticmethod
    def llava_projector_shape() -> Tuple[int, int]:
        return (1024, 4096, 4096)
    @staticmethod
    def llava_projector_repo() -> str:
        return "liuhaotian/llava-v1.5-7b"
--- a/swarms/modelui/extensions/multimodal/pipelines/llava/pipelines.py
+++ b/swarms/modelui/extensions/multimodal/pipelines/llava/pipelines.py
@ -0,0 +1,48 @@
 from typing import Optional
 from extensions.multimodal.abstract_pipeline import AbstractMultimodalPipeline
 available_pipelines = ['llava-7b', 'llava-13b', 'llava-llama-2-13b', 'llava-v1.5-13b', 'llava-v1.5-7b']
 def get_pipeline(name: str, params: dict) -> Optional[AbstractMultimodalPipeline]:
    if name == 'llava-7b':
        from .llava import LLaVA_v0_7B_Pipeline
        return LLaVA_v0_7B_Pipeline(params)
    if name == 'llava-13b':
        from .llava import LLaVA_v0_13B_Pipeline
        return LLaVA_v0_13B_Pipeline(params)
    if name == 'llava-llama-2-13b':
        from .llava import LLaVA_LLaMA_2_13B_Pipeline
        return LLaVA_LLaMA_2_13B_Pipeline(params)
    if name == 'llava-v1.5-7b':
        from .llava import LLaVA_v1_5_7B_Pipeline
        return LLaVA_v1_5_7B_Pipeline(params)
    if name == 'llava-v1.5-13b':
        from .llava import LLaVA_v1_5_13B_Pipeline
        return LLaVA_v1_5_13B_Pipeline(params)
    return None
 def get_pipeline_from_model_name(model_name: str, params: dict) -> Optional[AbstractMultimodalPipeline]:
    if 'llava' not in model_name.lower():
        return None
    if 'llama-2' in model_name.lower():
        if '13b' in model_name.lower():
            from .llava import LLaVA_LLaMA_2_13B_Pipeline
            return LLaVA_LLaMA_2_13B_Pipeline(params)
    elif 'llava-v1.5' in model_name.lower():
        if '13b' in model_name.lower():
            from .llava import LLaVA_v1_5_13B_Pipeline
            return LLaVA_v1_5_13B_Pipeline(params)
        if '7b' in model_name.lower():
            from .llava import LLaVA_v1_5_7B_Pipeline
            return LLaVA_v1_5_7B_Pipeline(params)
    else:
        if '7b' in model_name.lower():
            from .llava import LLaVA_v0_7B_Pipeline
            return LLaVA_v0_7B_Pipeline(params)
        if '13b' in model_name.lower():
            from .llava import LLaVA_v0_13B_Pipeline
            return LLaVA_v0_13B_Pipeline(params)
    return None
--- a/swarms/modelui/extensions/multimodal/pipelines/place-additional-pipelines-here.txt
+++ b/swarms/modelui/extensions/multimodal/pipelines/place-additional-pipelines-here.txt
--- a/swarms/modelui/extensions/multimodal/script.py
+++ b/swarms/modelui/extensions/multimodal/script.py
@ -0,0 +1,113 @@
 import base64
 import re
 import time
 from functools import partial
 from io import BytesIO
 import gradio as gr
 import torch
 from extensions.multimodal.multimodal_embedder import MultimodalEmbedder
 from modules import shared
 from modules.logging_colors import logger
 params = {
    "add_all_images_to_prompt": False,
    # device to run vision encoder on
    "vision_device": None,
    # bits to load vision encoder in, either 16 or 32
    "vision_bits": 32,
    # device to run multimodal projector on
    "projector_device": None,
    # multimodal projector bits, either 32 or 16
    "projector_bits": 32
 }
 # If 'state' is True, will hijack the next chat generation
 input_hijack = {
    'state': False,
    'value': ["", ""]
 }
 # initialized in ui, so that params are loaded from settings
 multimodal_embedder: MultimodalEmbedder = None
 def chat_input_modifier(text, visible_text, state):
    global input_hijack
    if input_hijack['state']:
        input_hijack['state'] = False
        return input_hijack['value'](text, visible_text)
    else:
        return text, visible_text
 def add_chat_picture(picture, text, visible_text):
    # resize the image, so that shortest edge is at least 224 (size for CLIP), and at most 300 (to keep history manageable)
    # Adjusted to 336 for the values here, due to the increased resolution in llava-v1.5
    max_hw, min_hw = max(picture.size), min(picture.size)
    aspect_ratio = max_hw / min_hw
    shortest_edge = int(max(336 / aspect_ratio, 336))
    longest_edge = int(shortest_edge * aspect_ratio)
    w = shortest_edge if picture.width < picture.height else longest_edge
    h = shortest_edge if picture.width >= picture.height else longest_edge
    picture = picture.resize((w, h))
    buffer = BytesIO()
    picture.save(buffer, format="PNG")
    img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
    image = f'<img src="data:image/jpeg;base64,{img_str}">'
    if '<image>' in text:
        text = text.replace('<image>', image)
    else:
        text = image + '\n' + text
    if visible_text == '' or visible_text is None:
        visible_text = text
    elif '<image>' in visible_text:
        visible_text = visible_text.replace('<image>', image)
    else:
        visible_text = visible_text + '\n' + image
    return text, visible_text
 def custom_tokenized_length(prompt):
    return multimodal_embedder.len_in_tokens(prompt)
 def tokenizer_modifier(state, prompt, input_ids, input_embeds):
    global params
    start_ts = time.time()
    image_match = re.search(r'<img src="data:image/jpeg;base64,[A-Za-z0-9+/=]+">', prompt)
    if image_match is None:
        return prompt, input_ids, input_embeds
    prompt, input_ids, input_embeds, total_embedded = multimodal_embedder.forward(prompt, state, params)
    logger.info(f'Embedded {total_embedded} image(s) in {time.time()-start_ts:.2f}s')
    return (prompt,
            input_ids.unsqueeze(0).to(shared.model.device, dtype=torch.int64),
            input_embeds.unsqueeze(0).to(shared.model.device, dtype=shared.model.dtype))
 def ui():
    global multimodal_embedder
    multimodal_embedder = MultimodalEmbedder(params)
    with gr.Column():
        picture_select = gr.Image(label='Send a picture', type='pil')
        # The models don't seem to deal well with multiple images
        single_image_checkbox = gr.Checkbox(False, label='Embed all images, not only the last one')
    # Prepare the input hijack
    picture_select.upload(
        lambda picture: input_hijack.update({"state": True, "value": partial(add_chat_picture, picture)}),
        [picture_select],
        None
    )
    picture_select.clear(lambda: input_hijack.update({"state": False, "value": ["", ""]}), None, None)
    single_image_checkbox.change(lambda x: params.update({"add_all_images_to_prompt": x}), single_image_checkbox, None)
    shared.gradio['Generate'].click(lambda: None, None, picture_select)
    shared.gradio['textbox'].submit(lambda: None, None, picture_select)
--- a/swarms/modelui/extensions/ngrok/README.md
+++ b/swarms/modelui/extensions/ngrok/README.md
@ -0,0 +1,69 @@
 # Adding an ingress URL through the ngrok Agent SDK for Python
 [ngrok](https://ngrok.com) is a globally distributed reverse proxy commonly used for quickly getting a public URL to a
 service running inside a private network, such as on your local laptop. The ngrok agent is usually
 deployed inside a private network and is used to communicate with the ngrok cloud service.
 By default the authtoken in the NGROK_AUTHTOKEN environment variable will be used. Alternatively one may be specified in
 the `settings.json` file, see the Examples below. Retrieve your authtoken on the [Auth Token page of your ngrok dashboard](https://dashboard.ngrok.com/get-started/your-authtoken), signing up is free.
 # Documentation
 For a list of all available options, see [the configuration documentation](https://ngrok.com/docs/ngrok-agent/config/) or [the connect example](https://github.com/ngrok/ngrok-py/blob/main/examples/ngrok-connect-full.py).
 The ngrok Python SDK is [on github here](https://github.com/ngrok/ngrok-py). A quickstart guide and a full API reference are included in the [ngrok-py Python API documentation](https://ngrok.github.io/ngrok-py/).
 # Running
 To enable ngrok install the requirements and then add `--extension ngrok` to the command line options, for instance:
 ```bash
 pip install -r extensions/ngrok/requirements.txt
 python server.py --extension ngrok
 ```
 In the output you should then see something like this:
 ```bash
 INFO:Loading the extension "ngrok"...
 INFO:Session created
 INFO:Created tunnel "9d9d0944dc75ff9d3aae653e5eb29fe9" with url "https://d83706cf7be7.ngrok.app"
 INFO:Tunnel "9d9d0944dc75ff9d3aae653e5eb29fe9" TCP forwarding to "localhost:7860"
 INFO:Ingress established at https://d83706cf7be7.ngrok.app
 ```
 You can now access the webui via the url shown, in this case `https://d83706cf7be7.ngrok.app`. It is recommended to add some authentication to the ingress, see below.
 # Example Settings
 In `settings.json` add a `ngrok` key with a dictionary of options, for instance:
 To enable basic authentication:
 ```json
 {
    "ngrok": {
        "basic_auth": "user:password"
    }
 }
 ```
 To enable OAUTH authentication:
 ```json
 {
    "ngrok": {
        "oauth_provider": "google",
        "oauth_allow_domains": "asdf.com",
        "oauth_allow_emails": "asdf@asdf.com"
    }
 }
 ```
 To add an authtoken instead of using the NGROK_AUTHTOKEN environment variable:
 ```json
 {
    "ngrok": {
        "authtoken": "<token>",
        "authtoken_from_env":false
    }
 }
 ```
--- a/swarms/modelui/extensions/ngrok/requirements.txt
+++ b/swarms/modelui/extensions/ngrok/requirements.txt
@ -0,0 +1 @@
 ngrok==0.*
--- a/swarms/modelui/extensions/ngrok/script.py
+++ b/swarms/modelui/extensions/ngrok/script.py
@ -0,0 +1,36 @@
 # Adds ngrok ingress, to use add `--extension ngrok` to the command line options
 #
 # Parameters can be customized in settings.json of webui, e.g.:
 # {"ngrok": {"basic_auth":"user:password"} }
 # or
 # {"ngrok": {"oauth_provider":"google", "oauth_allow_emails":["asdf@asdf.com"]} }
 #
 # See this example for full list of options: https://github.com/ngrok/ngrok-py/blob/main/examples/ngrok-connect-full.py
 # or the README.md in this directory.
 import logging
 from modules import shared
 # Pick up host/port command line arguments
 host = shared.args.listen_host if shared.args.listen_host and shared.args.listen else '127.0.0.1'
 port = shared.args.listen_port if shared.args.listen_port else '7860'
 # Default options
 options = {
    'addr': f"{host}:{port}",
    'authtoken_from_env': True,
    'session_metadata': 'text-generation-webui',
 }
 def ui():
    settings = shared.settings.get("ngrok")
    if settings:
        options.update(settings)
    try:
        import ngrok
        tunnel = ngrok.connect(**options)
        logging.info(f"Ingress established at: {tunnel.url()}")
    except ModuleNotFoundError:
        logging.error("===> ngrok library not found, please run `pip install -r extensions/ngrok/requirements.txt`")
--- a/swarms/modelui/extensions/openai/cache_embedding_model.py
+++ b/swarms/modelui/extensions/openai/cache_embedding_model.py
@ -0,0 +1,11 @@
 #!/usr/bin/env python3
 # preload the embedding model, useful for Docker images to prevent re-download on config change
 # Dockerfile:
 # ENV OPENEDAI_EMBEDDING_MODEL="sentence-transformers/all-mpnet-base-v2"  # Optional
 # RUN python3 cache_embedded_model.py
 import os
 import sentence_transformers
 st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", "sentence-transformers/all-mpnet-base-v2")
 model = sentence_transformers.SentenceTransformer(st_model)
--- a/swarms/modelui/extensions/openai/completions.py
+++ b/swarms/modelui/extensions/openai/completions.py
@ -0,0 +1,507 @@
 import copy
 import time
 from collections import deque
 import tiktoken
 import torch
 import torch.nn.functional as F
 from transformers import LogitsProcessor, LogitsProcessorList
 from extensions.openai.errors import InvalidRequestError
 from extensions.openai.utils import debug_msg
 from modules import shared
 from modules.chat import (
    generate_chat_prompt,
    generate_chat_reply,
    load_character_memoized
 )
 from modules.presets import load_preset_memoized
 from modules.text_generation import decode, encode, generate_reply
 class LogitsBiasProcessor(LogitsProcessor):
    def __init__(self, logit_bias={}):
        self.logit_bias = logit_bias
        if self.logit_bias:
            self.keys = list([int(key) for key in self.logit_bias.keys()])
            values = [self.logit_bias[str(key)] for key in self.keys]
            self.values = torch.tensor(values, dtype=torch.float, device=shared.model.device)
            debug_msg(f"{self})")
    def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> torch.FloatTensor:
        if self.logit_bias:
            debug_msg(logits[0, self.keys], " + ", self.values)
            logits[0, self.keys] += self.values
            debug_msg(" --> ", logits[0, self.keys])
            debug_msg(" max/min ", float(torch.max(logits[0])), float(torch.min(logits[0])))
        return logits
    def __repr__(self):
        return f"<{self.__class__.__name__}(logit_bias={self.logit_bias})>"
 class LogprobProcessor(LogitsProcessor):
    def __init__(self, logprobs=None):
        self.logprobs = logprobs
        self.token_alternatives = {}
    def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> torch.FloatTensor:
        if self.logprobs is not None:  # 0-5
            log_e_probabilities = F.log_softmax(logits, dim=1)
            top_values, top_indices = torch.topk(log_e_probabilities, k=self.logprobs + 1)
            top_tokens = [decode(tok) for tok in top_indices[0]]
            top_probs = [float(x) for x in top_values[0]]
            self.token_alternatives = dict(zip(top_tokens, top_probs))
            debug_msg(repr(self))
        return logits
    def __repr__(self):
        return f"<{self.__class__.__name__}(logprobs={self.logprobs}, token_alternatives={self.token_alternatives})>"
 def convert_logprobs_to_tiktoken(model, logprobs):
    # more problems than it's worth.
    # try:
    #     encoder = tiktoken.encoding_for_model(model)
    #     # just pick the first one if it encodes to multiple tokens... 99.9% not required and maybe worse overall.
    #     return dict([(encoder.decode([encoder.encode(token)[0]]), prob) for token, prob in logprobs.items()])
    # except KeyError:
    #     # assume native tokens if we can't find the tokenizer
    #     return logprobs
    return logprobs
 def process_parameters(body, is_legacy=False):
    generate_params = body
    max_tokens_str = 'length' if is_legacy else 'max_tokens'
    generate_params['max_new_tokens'] = body.pop(max_tokens_str)
    if generate_params['truncation_length'] == 0:
        generate_params['truncation_length'] = shared.settings['truncation_length']
    if body['preset'] is not None:
        preset = load_preset_memoized(body['preset'])
        generate_params.update(preset)
    generate_params['custom_stopping_strings'] = []
    if 'stop' in body:  # str or array, max len 4 (ignored)
        if isinstance(body['stop'], str):
            generate_params['custom_stopping_strings'] = [body['stop']]
        elif isinstance(body['stop'], list):
            generate_params['custom_stopping_strings'] = body['stop']
    logits_processor = []
    logit_bias = body.get('logit_bias', None)
    if logit_bias:  # {str: float, ...}
        # XXX convert tokens from tiktoken based on requested model
        # Ex.: 'logit_bias': {'1129': 100, '11442': 100, '16243': 100}
        try:
            encoder = tiktoken.encoding_for_model(generate_params['model'])
            new_logit_bias = {}
            for logit, bias in logit_bias.items():
                for x in encode(encoder.decode([int(logit)]), add_special_tokens=False)[0]:
                    if int(x) in [0, 1, 2, 29871]:  # XXX LLAMA tokens
                        continue
                    new_logit_bias[str(int(x))] = bias
            debug_msg('logit_bias_map', logit_bias, '->', new_logit_bias)
            logit_bias = new_logit_bias
        except KeyError:
            pass  # assume native tokens if we can't find the tokenizer
        logits_processor = [LogitsBiasProcessor(logit_bias)]
    logprobs = None  # coming to chat eventually
    if 'logprobs' in body:
        logprobs = body.get('logprobs', 0)  # maybe cap at topk? don't clamp 0-5.
        generate_params['logprob_proc'] = LogprobProcessor(logprobs)
        logits_processor.extend([generate_params['logprob_proc']])
    else:
        logprobs = None
    if logits_processor:  # requires logits_processor support
        generate_params['logits_processor'] = LogitsProcessorList(logits_processor)
    return generate_params
 def convert_history(history):
    '''
    Chat histories in this program are in the format [message, reply].
    This function converts OpenAI histories to that format.
    '''
    chat_dialogue = []
    current_message = ""
    current_reply = ""
    user_input = ""
    system_message = ""
    for entry in history:
        content = entry["content"]
        role = entry["role"]
        if role == "user":
            user_input = content
            if current_message:
                chat_dialogue.append([current_message, ''])
                current_message = ""
            current_message = content
        elif role == "assistant":
            current_reply = content
            if current_message:
                chat_dialogue.append([current_message, current_reply])
                current_message = ""
                current_reply = ""
            else:
                chat_dialogue.append(['', current_reply])
        elif role == "system":
            system_message = content
    # if current_message:
    #     chat_dialogue.append([current_message, ''])
    return user_input, system_message, {'internal': chat_dialogue, 'visible': copy.deepcopy(chat_dialogue)}
 def chat_completions_common(body: dict, is_legacy: bool = False, stream=False) -> dict:
    if body.get('functions', []):
        raise InvalidRequestError(message="functions is not supported.", param='functions')
    if body.get('function_call', ''):
        raise InvalidRequestError(message="function_call is not supported.", param='function_call')
    if 'messages' not in body:
        raise InvalidRequestError(message="messages is required", param='messages')
    messages = body['messages']
    for m in messages:
        if 'role' not in m:
            raise InvalidRequestError(message="messages: missing role", param='messages')
        elif m['role'] == 'function':
            raise InvalidRequestError(message="role: function is not supported.", param='messages')
        if 'content' not in m:
            raise InvalidRequestError(message="messages: missing content", param='messages')
    # Chat Completions
    object_type = 'chat.completions' if not stream else 'chat.completions.chunk'
    created_time = int(time.time())
    cmpl_id = "chatcmpl-%d" % (int(time.time() * 1000000000))
    resp_list = 'data' if is_legacy else 'choices'
    # generation parameters
    generate_params = process_parameters(body, is_legacy=is_legacy)
    continue_ = body['continue_']
    # Instruction template
    instruction_template = body['instruction_template'] or shared.settings['instruction_template']
    instruction_template = "Alpaca" if instruction_template == "None" else instruction_template
    name1_instruct, name2_instruct, _, _, context_instruct, turn_template, system_message = load_character_memoized(instruction_template, '', '', instruct=True)
    name1_instruct = body['name1_instruct'] or name1_instruct
    name2_instruct = body['name2_instruct'] or name2_instruct
    turn_template = body['turn_template'] or turn_template
    context_instruct = body['context_instruct'] or context_instruct
    system_message = body['system_message'] or system_message
    # Chat character
    character = body['character'] or shared.settings['character']
    character = "Assistant" if character == "None" else character
    name1 = body['name1'] or shared.settings['name1']
    name1, name2, _, greeting, context, _, _ = load_character_memoized(character, name1, '', instruct=False)
    name2 = body['name2'] or name2
    context = body['context'] or context
    greeting = body['greeting'] or greeting
    # History
    user_input, custom_system_message, history = convert_history(messages)
    generate_params.update({
        'mode': body['mode'],
        'name1': name1,
        'name2': name2,
        'context': context,
        'greeting': greeting,
        'name1_instruct': name1_instruct,
        'name2_instruct': name2_instruct,
        'context_instruct': context_instruct,
        'system_message': system_message,
        'custom_system_message': custom_system_message,
        'turn_template': turn_template,
        'chat-instruct_command': body['chat_instruct_command'],
        'history': history,
        'stream': stream
    })
    max_tokens = generate_params['max_new_tokens']
    if max_tokens in [None, 0]:
        generate_params['max_new_tokens'] = 200
        generate_params['auto_max_new_tokens'] = True
    requested_model = generate_params.pop('model')
    logprob_proc = generate_params.pop('logprob_proc', None)
    def chat_streaming_chunk(content):
        # begin streaming
        chunk = {
            "id": cmpl_id,
            "object": object_type,
            "created": created_time,
            "model": shared.model_name,
            resp_list: [{
                "index": 0,
                "finish_reason": None,
                # So yeah... do both methods? delta and messages.
                "message": {'role': 'assistant', 'content': content},
                "delta": {'role': 'assistant', 'content': content},
            }],
        }
        if logprob_proc:  # not official for chat yet
            top_logprobs = convert_logprobs_to_tiktoken(model=requested_model, logprobs=logprob_proc.token_alternatives)
            chunk[resp_list][0]["logprobs"] = {'top_logprobs': [top_logprobs]}
        # else:
        #    chunk[resp_list][0]["logprobs"] = None
        return chunk
    if stream:
        yield chat_streaming_chunk('')
    # generate reply #######################################
    prompt = generate_chat_prompt(user_input, generate_params)
    token_count = len(encode(prompt)[0])
    debug_msg({'prompt': prompt, 'generate_params': generate_params})
    generator = generate_chat_reply(
        user_input, generate_params, regenerate=False, _continue=continue_, loading_message=False)
    answer = ''
    seen_content = ''
    completion_token_count = 0
    for a in generator:
        answer = a['internal'][-1][1]
        if stream:
            len_seen = len(seen_content)
            new_content = answer[len_seen:]
            if not new_content or chr(0xfffd) in new_content:  # partial unicode character, don't send it yet.
                continue
            seen_content = answer
            chunk = chat_streaming_chunk(new_content)
            yield chunk
    completion_token_count = len(encode(answer)[0])
    stop_reason = "stop"
    if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= generate_params['max_new_tokens']:
        stop_reason = "length"
    if stream:
        chunk = chat_streaming_chunk('')
        chunk[resp_list][0]['finish_reason'] = stop_reason
        chunk['usage'] = {
            "prompt_tokens": token_count,
            "completion_tokens": completion_token_count,
            "total_tokens": token_count + completion_token_count
        }
        yield chunk
    else:
        resp = {
            "id": cmpl_id,
            "object": object_type,
            "created": created_time,
            "model": shared.model_name,
            resp_list: [{
                "index": 0,
                "finish_reason": stop_reason,
                "message": {"role": "assistant", "content": answer}
            }],
            "usage": {
                "prompt_tokens": token_count,
                "completion_tokens": completion_token_count,
                "total_tokens": token_count + completion_token_count
            }
        }
        if logprob_proc:  # not official for chat yet
            top_logprobs = convert_logprobs_to_tiktoken(model=requested_model, logprobs=logprob_proc.token_alternatives)
            resp[resp_list][0]["logprobs"] = {'top_logprobs': [top_logprobs]}
        # else:
        #     resp[resp_list][0]["logprobs"] = None
        yield resp
 def completions_common(body: dict, is_legacy: bool = False, stream=False):
    object_type = 'text_completion.chunk' if stream else 'text_completion'
    created_time = int(time.time())
    cmpl_id = "conv-%d" % (int(time.time() * 1000000000))
    resp_list = 'data' if is_legacy else 'choices'
    prompt_str = 'context' if is_legacy else 'prompt'
    # ... encoded as a string, array of strings, array of tokens, or array of token arrays.
    if prompt_str not in body:
        raise InvalidRequestError("Missing required input", param=prompt_str)
    # common params
    generate_params = process_parameters(body, is_legacy=is_legacy)
    max_tokens = generate_params['max_new_tokens']
    generate_params['stream'] = stream
    requested_model = generate_params.pop('model')
    logprob_proc = generate_params.pop('logprob_proc', None)
    suffix = body['suffix'] if body['suffix'] else ''
    echo = body['echo']
    if not stream:
        prompt_arg = body[prompt_str]
        if isinstance(prompt_arg, str) or (isinstance(prompt_arg, list) and isinstance(prompt_arg[0], int)):
            prompt_arg = [prompt_arg]
        resp_list_data = []
        total_completion_token_count = 0
        total_prompt_token_count = 0
        for idx, prompt in enumerate(prompt_arg, start=0):
            if isinstance(prompt[0], int):
                # token lists
                if requested_model == shared.model_name:
                    prompt = decode(prompt)[0]
                else:
                    try:
                        encoder = tiktoken.encoding_for_model(requested_model)
                        prompt = encoder.decode(prompt)
                    except KeyError:
                        prompt = decode(prompt)[0]
            prefix = prompt if echo else ''
            token_count = len(encode(prompt)[0])
            total_prompt_token_count += token_count
            # generate reply #######################################
            debug_msg({'prompt': prompt, 'generate_params': generate_params})
            generator = generate_reply(prompt, generate_params, is_chat=False)
            answer = ''
            for a in generator:
                answer = a
            completion_token_count = len(encode(answer)[0])
            total_completion_token_count += completion_token_count
            stop_reason = "stop"
            if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
                stop_reason = "length"
            respi = {
                "index": idx,
                "finish_reason": stop_reason,
                "text": prefix + answer + suffix,
                "logprobs": {'top_logprobs': [logprob_proc.token_alternatives]} if logprob_proc else None,
            }
            resp_list_data.extend([respi])
        resp = {
            "id": cmpl_id,
            "object": object_type,
            "created": created_time,
            "model": shared.model_name,
            resp_list: resp_list_data,
            "usage": {
                "prompt_tokens": total_prompt_token_count,
                "completion_tokens": total_completion_token_count,
                "total_tokens": total_prompt_token_count + total_completion_token_count
            }
        }
        yield resp
    else:
        prompt = body[prompt_str]
        if isinstance(prompt, list):
            if prompt and isinstance(prompt[0], int):
                try:
                    encoder = tiktoken.encoding_for_model(requested_model)
                    prompt = encoder.decode(prompt)
                except KeyError:
                    prompt = decode(prompt)[0]
            else:
                raise InvalidRequestError(message="API Batched generation not yet supported.", param=prompt_str)
        prefix = prompt if echo else ''
        token_count = len(encode(prompt)[0])
        def text_streaming_chunk(content):
            # begin streaming
            chunk = {
                "id": cmpl_id,
                "object": object_type,
                "created": created_time,
                "model": shared.model_name,
                resp_list: [{
                    "index": 0,
                    "finish_reason": None,
                    "text": content,
                    "logprobs": {'top_logprobs': [logprob_proc.token_alternatives]} if logprob_proc else None,
                }],
            }
            return chunk
        yield text_streaming_chunk(prefix)
        # generate reply #######################################
        debug_msg({'prompt': prompt, 'generate_params': generate_params})
        generator = generate_reply(prompt, generate_params, is_chat=False)
        answer = ''
        seen_content = ''
        completion_token_count = 0
        for a in generator:
            answer = a
            len_seen = len(seen_content)
            new_content = answer[len_seen:]
            if not new_content or chr(0xfffd) in new_content:  # partial unicode character, don't send it yet.
                continue
            seen_content = answer
            chunk = text_streaming_chunk(new_content)
            yield chunk
        completion_token_count = len(encode(answer)[0])
        stop_reason = "stop"
        if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
            stop_reason = "length"
        chunk = text_streaming_chunk(suffix)
        chunk[resp_list][0]["finish_reason"] = stop_reason
        chunk["usage"] = {
            "prompt_tokens": token_count,
            "completion_tokens": completion_token_count,
            "total_tokens": token_count + completion_token_count
        }
        yield chunk
 def chat_completions(body: dict, is_legacy: bool = False) -> dict:
    generator = chat_completions_common(body, is_legacy, stream=False)
    return deque(generator, maxlen=1).pop()
 def stream_chat_completions(body: dict, is_legacy: bool = False):
    for resp in chat_completions_common(body, is_legacy, stream=True):
        yield resp
 def completions(body: dict, is_legacy: bool = False) -> dict:
    generator = completions_common(body, is_legacy, stream=False)
    return deque(generator, maxlen=1).pop()
 def stream_completions(body: dict, is_legacy: bool = False):
    for resp in completions_common(body, is_legacy, stream=True):
        yield resp
--- a/swarms/modelui/extensions/openai/embeddings.py
+++ b/swarms/modelui/extensions/openai/embeddings.py
@ -0,0 +1,92 @@
 import os
 import numpy as np
 from extensions.openai.errors import ServiceUnavailableError
 from extensions.openai.utils import debug_msg, float_list_to_base64
 from modules.logging_colors import logger
 embeddings_params_initialized = False
 def initialize_embedding_params():
    '''
    using 'lazy loading' to avoid circular import
    so this function will be executed only once
    '''
    global embeddings_params_initialized
    if not embeddings_params_initialized:
        from extensions.openai.script import params
        global st_model, embeddings_model, embeddings_device
        st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", params.get('embedding_model', 'all-mpnet-base-v2'))
        embeddings_model = None
        # OPENEDAI_EMBEDDING_DEVICE: auto (best or cpu), cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone
        embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", params.get('embedding_device', 'cpu'))
        if embeddings_device.lower() == 'auto':
            embeddings_device = None
        embeddings_params_initialized = True
 def load_embedding_model(model: str):
    try:
        from sentence_transformers import SentenceTransformer
    except ModuleNotFoundError:
        logger.error("The sentence_transformers module has not been found. Please install it manually with pip install -U sentence-transformers.")
        raise ModuleNotFoundError
    initialize_embedding_params()
    global embeddings_device, embeddings_model
    try:
        print(f"Try embedding model: {model} on {embeddings_device}")
        embeddings_model = SentenceTransformer(model, device=embeddings_device)
        print(f"Loaded embedding model: {model}")
    except Exception as e:
        embeddings_model = None
        raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e))
 def get_embeddings_model():
    initialize_embedding_params()
    global embeddings_model, st_model
    if st_model and not embeddings_model:
        load_embedding_model(st_model)  # lazy load the model
    return embeddings_model
 def get_embeddings_model_name() -> str:
    initialize_embedding_params()
    global st_model
    return st_model
 def get_embeddings(input: list) -> np.ndarray:
    model = get_embeddings_model()
    debug_msg(f"embedding model : {model}")
    embedding = model.encode(input, convert_to_numpy=True, normalize_embeddings=True, convert_to_tensor=False)
    debug_msg(f"embedding result : {embedding}")  # might be too long even for debug, use at you own will
    return embedding
 def embeddings(input: list, encoding_format: str) -> dict:
    embeddings = get_embeddings(input)
    if encoding_format == "base64":
        data = [{"object": "embedding", "embedding": float_list_to_base64(emb), "index": n} for n, emb in enumerate(embeddings)]
    else:
        data = [{"object": "embedding", "embedding": emb.tolist(), "index": n} for n, emb in enumerate(embeddings)]
    response = {
        "object": "list",
        "data": data,
        "model": st_model,  # return the real model
        "usage": {
            "prompt_tokens": 0,
            "total_tokens": 0,
        }
    }
    debug_msg(f"Embeddings return size: {len(embeddings[0])}, number: {len(embeddings)}")
    return response
--- a/swarms/modelui/extensions/openai/errors.py
+++ b/swarms/modelui/extensions/openai/errors.py
@ -0,0 +1,31 @@
 class OpenAIError(Exception):
    def __init__(self, message=None, code=500, internal_message=''):
        self.message = message
        self.code = code
        self.internal_message = internal_message
    def __repr__(self):
        return "%s(message=%r, code=%d)" % (
            self.__class__.__name__,
            self.message,
            self.code,
        )
 class InvalidRequestError(OpenAIError):
    def __init__(self, message, param, code=400, internal_message=''):
        super().__init__(message, code, internal_message)
        self.param = param
    def __repr__(self):
        return "%s(message=%r, code=%d, param=%s)" % (
            self.__class__.__name__,
            self.message,
            self.code,
            self.param,
        )
 class ServiceUnavailableError(OpenAIError):
    def __init__(self, message="Service unavailable, please try again later.", code=503, internal_message=''):
        super().__init__(message, code, internal_message)
--- a/swarms/modelui/extensions/openai/images.py
+++ b/swarms/modelui/extensions/openai/images.py
@ -0,0 +1,70 @@
 import os
 import time
 import requests
 from extensions.openai.errors import ServiceUnavailableError
 def generations(prompt: str, size: str, response_format: str, n: int):
    # Stable Diffusion callout wrapper for txt2img
    # Low effort implementation for compatibility. With only "prompt" being passed and assuming DALL-E
    # the results will be limited and likely poor. SD has hundreds of models and dozens of settings.
    # If you want high quality tailored results you should just use the Stable Diffusion API directly.
    # it's too general an API to try and shape the result with specific tags like negative prompts
    # or "masterpiece", etc. SD configuration is beyond the scope of this API.
    # At this point I will not add the edits and variations endpoints (ie. img2img) because they
    # require changing the form data handling to accept multipart form data, also to properly support
    # url return types will require file management and a web serving files... Perhaps later!
    base_model_size = 512 if 'SD_BASE_MODEL_SIZE' not in os.environ else int(os.environ.get('SD_BASE_MODEL_SIZE', 512))
    sd_defaults = {
        'sampler_name': 'DPM++ 2M Karras',  # vast improvement
        'steps': 30,
    }
    width, height = [int(x) for x in size.split('x')]  # ignore the restrictions on size
    # to hack on better generation, edit default payload.
    payload = {
        'prompt': prompt,  # ignore prompt limit of 1000 characters
        'width': width,
        'height': height,
        'batch_size': n,
    }
    payload.update(sd_defaults)
    scale = min(width, height) / base_model_size
    if scale >= 1.2:
        # for better performance with the default size (1024), and larger res.
        scaler = {
            'width': width // scale,
            'height': height // scale,
            'hr_scale': scale,
            'enable_hr': True,
            'hr_upscaler': 'Latent',
            'denoising_strength': 0.68,
        }
        payload.update(scaler)
    resp = {
        'created': int(time.time()),
        'data': []
    }
    from extensions.openai.script import params
    # TODO: support SD_WEBUI_AUTH username:password pair.
    sd_url = f"{os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', ''))}/sdapi/v1/txt2img"
    response = requests.post(url=sd_url, json=payload)
    r = response.json()
    if response.status_code != 200 or 'images' not in r:
        print(r)
        raise ServiceUnavailableError(r.get('error', 'Unknown error calling Stable Diffusion'), code=response.status_code, internal_message=r.get('errors', None))
    # r['parameters']...
    for b64_json in r['images']:
        if response_format == 'b64_json':
            resp['data'].extend([{'b64_json': b64_json}])
        else:
            resp['data'].extend([{'url': f'data:image/png;base64,{b64_json}'}])  # yeah it's lazy. requests.get() will not work with this
    return resp
--- a/swarms/modelui/extensions/openai/models.py
+++ b/swarms/modelui/extensions/openai/models.py
@ -0,0 +1,69 @@
 from modules import shared
 from modules.logging_colors import logger
 from modules.models import load_model, unload_model
 from modules.models_settings import get_model_metadata, update_model_parameters
 from modules.utils import get_available_models
 def get_current_model_info():
    return {
        'model_name': shared.model_name,
        'lora_names': shared.lora_names
    }
 def list_models():
    result = {
        "object": "list",
        "data": []
    }
    for model in get_dummy_models() + get_available_models()[1:]:
        result["data"].append(model_info_dict(model))
    return result
 def model_info_dict(model_name: str) -> dict:
    return {
        "id": model_name,
        "object": "model",
        "created": 0,
        "owned_by": "user"
    }
 def get_dummy_models() -> list:
    return [  # these are expected by so much, so include some here as a dummy
        'gpt-3.5-turbo',
        'text-embedding-ada-002',
    ]
 def _load_model(data):
    model_name = data["model_name"]
    args = data["args"]
    settings = data["settings"]
    unload_model()
    model_settings = get_model_metadata(model_name)
    update_model_parameters(model_settings)
    # Update shared.args with custom model loading settings
    if args:
        for k in args:
            if hasattr(shared.args, k):
                setattr(shared.args, k, args[k])
    shared.model, shared.tokenizer = load_model(model_name)
    shared.model_name = model_name
    # Update shared.settings with custom generation defaults
    if settings:
        for k in settings:
            if k in shared.settings:
                shared.settings[k] = settings[k]
                if k == 'truncation_length':
                    logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")
                elif k == 'instruction_template':
                    logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
--- a/swarms/modelui/extensions/openai/moderations.py
+++ b/swarms/modelui/extensions/openai/moderations.py
@ -0,0 +1,69 @@
 import time
 import numpy as np
 from numpy.linalg import norm
 from extensions.openai.embeddings import get_embeddings
 moderations_disabled = False  # return 0/false
 category_embeddings = None
 antonym_embeddings = None
 categories = ["sexual", "hate", "harassment", "self-harm", "sexual/minors", "hate/threatening", "violence/graphic", "self-harm/intent", "self-harm/instructions", "harassment/threatening", "violence"]
 flag_threshold = 0.5
 def get_category_embeddings() -> dict:
    global category_embeddings, categories
    if category_embeddings is None:
        embeddings = get_embeddings(categories).tolist()
        category_embeddings = dict(zip(categories, embeddings))
    return category_embeddings
 def cosine_similarity(a: np.ndarray, b: np.ndarray) -> float:
    return np.dot(a, b) / (norm(a) * norm(b))
 # seems most openai like with all-mpnet-base-v2
 def mod_score(a: np.ndarray, b: np.ndarray) -> float:
    return 2.0 * np.dot(a, b)
 def moderations(input):
    global category_embeddings, categories, flag_threshold, moderations_disabled
    results = {
        "id": f"modr-{int(time.time()*1e9)}",
        "model": "text-moderation-001",
        "results": [],
    }
    if moderations_disabled:
        results['results'] = [{
            'categories': dict([(C, False) for C in categories]),
            'category_scores': dict([(C, 0.0) for C in categories]),
            'flagged': False,
        }]
        return results
    category_embeddings = get_category_embeddings()
    # input, string or array
    if isinstance(input, str):
        input = [input]
    for in_str in input:
        for ine in get_embeddings([in_str]):
            category_scores = dict([(C, mod_score(category_embeddings[C], ine)) for C in categories])
            category_flags = dict([(C, bool(category_scores[C] > flag_threshold)) for C in categories])
            flagged = any(category_flags.values())
            results['results'].extend([{
                'flagged': flagged,
                'categories': category_flags,
                'category_scores': category_scores,
            }])
    print(results)
    return results
--- a/swarms/modelui/extensions/openai/requirements.txt
+++ b/swarms/modelui/extensions/openai/requirements.txt
@ -0,0 +1,4 @@
 SpeechRecognition==3.10.0
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
 tiktoken
--- a/swarms/modelui/extensions/openai/script.py
+++ b/swarms/modelui/extensions/openai/script.py
@ -0,0 +1,317 @@
 import asyncio
 import json
 import os
 import traceback
 from threading import Thread
 import speech_recognition as sr
 import uvicorn
 from fastapi import Depends, FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.requests import Request
 from fastapi.responses import JSONResponse
 from pydub import AudioSegment
 from sse_starlette import EventSourceResponse
 import extensions.openai.completions as OAIcompletions
 import extensions.openai.embeddings as OAIembeddings
 import extensions.openai.images as OAIimages
 import extensions.openai.models as OAImodels
 import extensions.openai.moderations as OAImoderations
 from extensions.openai.errors import ServiceUnavailableError
 from extensions.openai.tokens import token_count, token_decode, token_encode
 from extensions.openai.utils import _start_cloudflared
 from modules import shared
 from modules.logging_colors import logger
 from modules.models import unload_model
 from modules.text_generation import stop_everything_event
 from .typing import (
    ChatCompletionRequest,
    ChatCompletionResponse,
    CompletionRequest,
    CompletionResponse,
    DecodeRequest,
    DecodeResponse,
    EmbeddingsRequest,
    EmbeddingsResponse,
    EncodeRequest,
    EncodeResponse,
    LoadModelRequest,
    ModelInfoResponse,
    TokenCountResponse,
    to_dict
 )
 params = {
    'embedding_device': 'cpu',
    'embedding_model': 'sentence-transformers/all-mpnet-base-v2',
    'sd_webui_url': '',
    'debug': 0
 }
 streaming_semaphore = asyncio.Semaphore(1)
 def verify_api_key(authorization: str = Header(None)) -> None:
    expected_api_key = shared.args.api_key
    if expected_api_key and (authorization is None or authorization != f"Bearer {expected_api_key}"):
        raise HTTPException(status_code=401, detail="Unauthorized")
 app = FastAPI(dependencies=[Depends(verify_api_key)])
 # Configure CORS settings to allow all origins, methods, and headers
 app.add_middleware(
    CORSMiddleware,
    allow_origins=["*"],
    allow_credentials=True,
    allow_methods=["*"],
    allow_headers=["*"]
 )
@app.options("/")
 async def options_route():
    return JSONResponse(content="OK")
@app.post('/v1/completions', response_model=CompletionResponse)
 async def openai_completions(request: Request, request_data: CompletionRequest):
    path = request.url.path
    is_legacy = "/generate" in path
    if request_data.stream:
        async def generator():
            async with streaming_semaphore:
                response = OAIcompletions.stream_completions(to_dict(request_data), is_legacy=is_legacy)
                for resp in response:
                    disconnected = await request.is_disconnected()
                    if disconnected:
                        break
                    yield {"data": json.dumps(resp)}
        return EventSourceResponse(generator())  # SSE streaming
    else:
        response = OAIcompletions.completions(to_dict(request_data), is_legacy=is_legacy)
        return JSONResponse(response)
@app.post('/v1/chat/completions', response_model=ChatCompletionResponse)
 async def openai_chat_completions(request: Request, request_data: ChatCompletionRequest):
    path = request.url.path
    is_legacy = "/generate" in path
    if request_data.stream:
        async def generator():
            async with streaming_semaphore:
                response = OAIcompletions.stream_chat_completions(to_dict(request_data), is_legacy=is_legacy)
                for resp in response:
                    disconnected = await request.is_disconnected()
                    if disconnected:
                        break
                    yield {"data": json.dumps(resp)}
        return EventSourceResponse(generator())  # SSE streaming
    else:
        response = OAIcompletions.chat_completions(to_dict(request_data), is_legacy=is_legacy)
        return JSONResponse(response)
@app.get("/v1/models")
@app.get("/v1/models/{model}")
 async def handle_models(request: Request):
    path = request.url.path
    is_list = request.url.path.split('?')[0].split('#')[0] == '/v1/models'
    if is_list:
        response = OAImodels.list_models()
    else:
        model_name = path[len('/v1/models/'):]
        response = OAImodels.model_info_dict(model_name)
    return JSONResponse(response)
@app.get('/v1/billing/usage')
 def handle_billing_usage():
    '''
    Ex. /v1/dashboard/billing/usage?start_date=2023-05-01&end_date=2023-05-31
    '''
    return JSONResponse(content={"total_usage": 0})
@app.post('/v1/audio/transcriptions')
 async def handle_audio_transcription(request: Request):
    r = sr.Recognizer()
    form = await request.form()
    audio_file = await form["file"].read()
    audio_data = AudioSegment.from_file(audio_file)
    # Convert AudioSegment to raw data
    raw_data = audio_data.raw_data
    # Create AudioData object
    audio_data = sr.AudioData(raw_data, audio_data.frame_rate, audio_data.sample_width)
    whipser_language = form.getvalue('language', None)
    whipser_model = form.getvalue('model', 'tiny')  # Use the model from the form data if it exists, otherwise default to tiny
    transcription = {"text": ""}
    try:
        transcription["text"] = r.recognize_whisper(audio_data, language=whipser_language, model=whipser_model)
    except sr.UnknownValueError:
        print("Whisper could not understand audio")
        transcription["text"] = "Whisper could not understand audio UnknownValueError"
    except sr.RequestError as e:
        print("Could not request results from Whisper", e)
        transcription["text"] = "Whisper could not understand audio RequestError"
    return JSONResponse(content=transcription)
@app.post('/v1/images/generations')
 async def handle_image_generation(request: Request):
    if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')):
        raise ServiceUnavailableError("Stable Diffusion not available. SD_WEBUI_URL not set.")
    body = await request.json()
    prompt = body['prompt']
    size = body.get('size', '1024x1024')
    response_format = body.get('response_format', 'url')  # or b64_json
    n = body.get('n', 1)  # ignore the batch limits of max 10
    response = await OAIimages.generations(prompt=prompt, size=size, response_format=response_format, n=n)
    return JSONResponse(response)
@app.post("/v1/embeddings", response_model=EmbeddingsResponse)
 async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
    input = request_data.input
    if not input:
        raise HTTPException(status_code=400, detail="Missing required argument input")
    if type(input) is str:
        input = [input]
    response = OAIembeddings.embeddings(input, request_data.encoding_format)
    return JSONResponse(response)
@app.post("/v1/moderations")
 async def handle_moderations(request: Request):
    body = await request.json()
    input = body["input"]
    if not input:
        raise HTTPException(status_code=400, detail="Missing required argument input")
    response = OAImoderations.moderations(input)
    return JSONResponse(response)
@app.post("/v1/internal/encode", response_model=EncodeResponse)
 async def handle_token_encode(request_data: EncodeRequest):
    response = token_encode(request_data.text)
    return JSONResponse(response)
@app.post("/v1/internal/decode", response_model=DecodeResponse)
 async def handle_token_decode(request_data: DecodeRequest):
    response = token_decode(request_data.tokens)
    return JSONResponse(response)
@app.post("/v1/internal/token-count", response_model=TokenCountResponse)
 async def handle_token_count(request_data: EncodeRequest):
    response = token_count(request_data.text)
    return JSONResponse(response)
@app.post("/v1/internal/stop-generation")
 async def handle_stop_generation(request: Request):
    stop_everything_event()
    return JSONResponse(content="OK")
@app.get("/v1/internal/model/info", response_model=ModelInfoResponse)
 async def handle_model_info():
    payload = OAImodels.get_current_model_info()
    return JSONResponse(content=payload)
@app.post("/v1/internal/model/load")
 async def handle_load_model(request_data: LoadModelRequest):
    '''
    This endpoint is experimental and may change in the future.
    The "args" parameter can be used to modify flags like "--load-in-4bit"
    or "--n-gpu-layers" before loading a model. Example:
    ```
    "args": {
      "load_in_4bit": true,
      "n_gpu_layers": 12
    }
    ```
    Note that those settings will remain after loading the model. So you
    may need to change them back to load a second model.
    The "settings" parameter is also a dict but with keys for the
    shared.settings object. It can be used to modify the default instruction
    template like this:
    ```
    "settings": {
      "instruction_template": "Alpaca"
    }
    ```
    '''
    try:
        OAImodels._load_model(to_dict(request_data))
        return JSONResponse(content="OK")
    except:
        traceback.print_exc()
        return HTTPException(status_code=400, detail="Failed to load the model.")
@app.post("/v1/internal/model/unload")
 async def handle_unload_model():
    unload_model()
    return JSONResponse(content="OK")
 def run_server():
    server_addr = '0.0.0.0' if shared.args.listen else '127.0.0.1'
    port = int(os.environ.get('OPENEDAI_PORT', shared.args.api_port))
    ssl_certfile = os.environ.get('OPENEDAI_CERT_PATH', shared.args.ssl_certfile)
    ssl_keyfile = os.environ.get('OPENEDAI_KEY_PATH', shared.args.ssl_keyfile)
    if shared.args.public_api:
        def on_start(public_url: str):
            logger.info(f'OpenAI-compatible API URL:\n\n{public_url}\n')
        _start_cloudflared(port, shared.args.public_api_id, max_attempts=3, on_start=on_start)
    else:
        if ssl_keyfile and ssl_certfile:
            logger.info(f'OpenAI-compatible API URL:\n\nhttps://{server_addr}:{port}\n')
        else:
            logger.info(f'OpenAI-compatible API URL:\n\nhttp://{server_addr}:{port}\n')
    if shared.args.api_key:
        logger.info(f'OpenAI API key:\n\n{shared.args.api_key}\n')
    uvicorn.run(app, host=server_addr, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile)
 def setup():
    Thread(target=run_server, daemon=True).start()
--- a/swarms/modelui/extensions/openai/tokens.py
+++ b/swarms/modelui/extensions/openai/tokens.py
@ -0,0 +1,26 @@
 from modules.text_generation import decode, encode
 def token_count(prompt):
    tokens = encode(prompt)[0]
    return {
        'length': len(tokens)
    }
 def token_encode(input):
    tokens = encode(input)[0]
    if tokens.__class__.__name__ in ['Tensor', 'ndarray']:
        tokens = tokens.tolist()
    return {
        'tokens': tokens,
        'length': len(tokens),
    }
 def token_decode(tokens):
    output = decode(tokens)
    return {
        'text': output
    }
--- a/swarms/modelui/extensions/openai/typing.py
+++ b/swarms/modelui/extensions/openai/typing.py
@ -0,0 +1,175 @@
 import json
 import time
 from typing import List
 from pydantic import BaseModel, Field
 class GenerationOptions(BaseModel):
    preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
    min_p: float = 0
    top_k: int = 0
    repetition_penalty: float = 1
    repetition_penalty_range: int = 0
    typical_p: float = 1
    tfs: float = 1
    top_a: float = 0
    epsilon_cutoff: float = 0
    eta_cutoff: float = 0
    guidance_scale: float = 1
    negative_prompt: str = ''
    penalty_alpha: float = 0
    mirostat_mode: int = 0
    mirostat_tau: float = 5
    mirostat_eta: float = 0.1
    temperature_last: bool = False
    do_sample: bool = True
    seed: int = -1
    encoder_repetition_penalty: float = 1
    no_repeat_ngram_size: int = 0
    min_length: int = 0
    num_beams: int = 1
    length_penalty: float = 1
    early_stopping: bool = False
    truncation_length: int = 0
    max_tokens_second: int = 0
    custom_token_bans: str = ""
    auto_max_new_tokens: bool = False
    ban_eos_token: bool = False
    add_bos_token: bool = True
    skip_special_tokens: bool = True
    grammar_string: str = ""
 class CompletionRequestParams(BaseModel):
    model: str | None = Field(default=None, description="Unused parameter. To change the model, use the /v1/internal/model/load endpoint.")
    prompt: str | List[str]
    best_of: int | None = Field(default=1, description="Unused parameter.")
    echo: bool | None = False
    frequency_penalty: float | None = 0
    logit_bias: dict | None = None
    logprobs: int | None = None
    max_tokens: int | None = 16
    n: int | None = Field(default=1, description="Unused parameter.")
    presence_penalty: float | None = 0
    stop: str | List[str] | None = None
    stream: bool | None = False
    suffix: str | None = None
    temperature: float | None = 1
    top_p: float | None = 1
    user: str | None = Field(default=None, description="Unused parameter.")
 class CompletionRequest(GenerationOptions, CompletionRequestParams):
    pass
 class CompletionResponse(BaseModel):
    id: str
    choices: List[dict]
    created: int = int(time.time())
    model: str
    object: str = "text_completion"
    usage: dict
 class ChatCompletionRequestParams(BaseModel):
    messages: List[dict]
    model: str | None = Field(default=None, description="Unused parameter. To change the model, use the /v1/internal/model/load endpoint.")
    frequency_penalty: float | None = 0
    function_call: str | dict | None = Field(default=None, description="Unused parameter.")
    functions: List[dict] | None = Field(default=None, description="Unused parameter.")
    logit_bias: dict | None = None
    max_tokens: int | None = None
    n: int | None = Field(default=1, description="Unused parameter.")
    presence_penalty: float | None = 0
    stop: str | List[str] | None = None
    stream: bool | None = False
    temperature: float | None = 1
    top_p: float | None = 1
    user: str | None = Field(default=None, description="Unused parameter.")
    mode: str = Field(default='instruct', description="Valid options: instruct, chat, chat-instruct.")
    instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/instruction-templates. If not set, the correct template will be guessed using the regex expressions in models/config.yaml.")
    turn_template: str | None = Field(default=None, description="Overwrites the value set by instruction_template.")
    name1_instruct: str | None = Field(default=None, description="Overwrites the value set by instruction_template.")
    name2_instruct: str | None = Field(default=None, description="Overwrites the value set by instruction_template.")
    context_instruct: str | None = Field(default=None, description="Overwrites the value set by instruction_template.")
    system_message: str | None = Field(default=None, description="Overwrites the value set by instruction_template.")
    character: str | None = Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.")
    name1: str | None = Field(default=None, description="Your name (the user). By default, it's \"You\".")
    name2: str | None = Field(default=None, description="Overwrites the value set by character.")
    context: str | None = Field(default=None, description="Overwrites the value set by character.")
    greeting: str | None = Field(default=None, description="Overwrites the value set by character.")
    chat_instruct_command: str | None = None
    continue_: bool = Field(default=False, description="Makes the last bot message in the history be continued instead of starting a new message.")
 class ChatCompletionRequest(GenerationOptions, ChatCompletionRequestParams):
    pass
 class ChatCompletionResponse(BaseModel):
    id: str
    choices: List[dict]
    created: int = int(time.time())
    model: str
    object: str = "chat.completion"
    usage: dict
 class EncodeRequest(BaseModel):
    text: str
 class DecodeRequest(BaseModel):
    tokens: List[int]
 class EncodeResponse(BaseModel):
    tokens: List[int]
    length: int
 class DecodeResponse(BaseModel):
    text: str
 class TokenCountResponse(BaseModel):
    length: int
 class ModelInfoResponse(BaseModel):
    model_name: str
    lora_names: List[str]
 class LoadModelRequest(BaseModel):
    model_name: str
    args: dict | None = None
    settings: dict | None = None
 class EmbeddingsRequest(BaseModel):
    input: str | List[str]
    model: str | None = Field(default=None, description="Unused parameter. To change the model, set the OPENEDAI_EMBEDDING_MODEL and OPENEDAI_EMBEDDING_DEVICE environment variables before starting the server.")
    encoding_format: str = Field(default="float", description="Can be float or base64.")
    user: str | None = Field(default=None, description="Unused parameter.")
 class EmbeddingsResponse(BaseModel):
    index: int
    embedding: List[float]
    object: str = "embedding"
 def to_json(obj):
    return json.dumps(obj.__dict__, indent=4)
 def to_dict(obj):
    return obj.__dict__
--- a/swarms/modelui/extensions/openai/utils.py
+++ b/swarms/modelui/extensions/openai/utils.py
@ -0,0 +1,54 @@
 import base64
 import os
 import time
 import traceback
 from typing import Callable, Optional
 import numpy as np
 def float_list_to_base64(float_array: np.ndarray) -> str:
    # Convert the list to a float32 array that the OpenAPI client expects
    # float_array = np.array(float_list, dtype="float32")
    # Get raw bytes
    bytes_array = float_array.tobytes()
    # Encode bytes into base64
    encoded_bytes = base64.b64encode(bytes_array)
    # Turn raw base64 encoded bytes into ASCII
    ascii_string = encoded_bytes.decode('ascii')
    return ascii_string
 def debug_msg(*args, **kwargs):
    from extensions.openai.script import params
    if os.environ.get("OPENEDAI_DEBUG", params.get('debug', 0)):
        print(*args, **kwargs)
 def _start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_start: Optional[Callable[[str], None]] = None):
    try:
        from flask_cloudflared import _run_cloudflared
    except ImportError:
        print('You should install flask_cloudflared manually')
        raise Exception(
            'flask_cloudflared not installed. Make sure you installed the requirements.txt for this extension.')
    for _ in range(max_attempts):
        try:
            if tunnel_id is not None:
                public_url = _run_cloudflared(port, port + 1, tunnel_id=tunnel_id)
            else:
                public_url = _run_cloudflared(port, port + 1)
            if on_start:
                on_start(public_url)
            return
        except Exception:
            traceback.print_exc()
            time.sleep(3)
        raise Exception('Could not start cloudflared.')
--- a/swarms/modelui/extensions/perplexity_colors/script.py
+++ b/swarms/modelui/extensions/perplexity_colors/script.py
@ -0,0 +1,309 @@
 import time
 import gradio
 import numpy as np
 import torch
 from transformers import LogitsProcessor
 from modules import html_generator, shared
 params = {
    'active': True,
    'color_by_perplexity': False,
    'color_by_probability': False,
    'ppl_scale': 15.0,  # No slider for this right now, because I don't think it really needs to be changed. Very large perplexity scores don't show up often.
    'probability_dropdown': False,
    'verbose': False  # For debugging mostly
 }
 class PerplexityLogits(LogitsProcessor):
    def __init__(self, verbose=False):
        self.generated_token_ids = []
        self.selected_probs = []
        self.top_token_ids_list = []
        self.top_probs_list = []
        self.perplexities_list = []
        self.last_probs = None
        self.verbose = verbose
    def __call__(self, input_ids, scores):
        # t0 = time.time()
        probs = torch.softmax(scores, dim=-1, dtype=torch.float)
        log_probs = torch.nan_to_num(torch.log(probs))  # Note: This is to convert log(0) nan to 0, but probs*log_probs makes this 0 not affect the perplexity.
        entropy = -torch.sum(probs * log_probs)
        entropy = entropy.cpu().numpy()
        perplexity = round(float(np.exp(entropy)), 4)
        self.perplexities_list.append(perplexity)
        last_token_id = int(input_ids[0][-1].cpu().numpy().item())
        # Store the generated tokens (not sure why this isn't accessible in the output endpoint!)
        self.generated_token_ids.append(last_token_id)
        # Get last probability, and add to the list if it wasn't there
        if len(self.selected_probs) > 0:
            # Is the selected token in the top tokens?
            if self.verbose:
                print('Probs: Token after', shared.tokenizer.decode(last_token_id))
                print('Probs:', [shared.tokenizer.decode(token_id) for token_id in self.top_token_ids_list[-1][0]])
                print('Probs:', [round(float(prob), 4) for prob in self.top_probs_list[-1][0]])
            if last_token_id in self.top_token_ids_list[-1][0]:
                idx = self.top_token_ids_list[-1][0].index(last_token_id)
                self.selected_probs.append(self.top_probs_list[-1][0][idx])
            else:
                self.top_token_ids_list[-1][0].append(last_token_id)
                last_prob = round(float(self.last_probs[last_token_id]), 4)
                self.top_probs_list[-1][0].append(last_prob)
                self.selected_probs.append(last_prob)
        else:
            self.selected_probs.append(1.0)  # Placeholder for the last token of the prompt
        if self.verbose:
            pplbar = "-"
            if not np.isnan(perplexity):
                pplbar = "*" * round(perplexity)
            print(f"PPL: Token after {shared.tokenizer.decode(last_token_id)}\t{perplexity:.2f}\t{pplbar}")
        # Get top 5 probabilities
        top_tokens_and_probs = torch.topk(probs, 5)
        top_probs = top_tokens_and_probs.values.cpu().numpy().astype(float).tolist()
        top_token_ids = top_tokens_and_probs.indices.cpu().numpy().astype(int).tolist()
        self.top_token_ids_list.append(top_token_ids)
        self.top_probs_list.append(top_probs)
        probs = probs.cpu().numpy().flatten()
        self.last_probs = probs  # Need to keep this as a reference for top probs
        # t1 = time.time()
        # print(f"PPL Processor: {(t1-t0):.3f} s")
        # About 1 ms, though occasionally up to around 100 ms, not sure why...
        # Doesn't actually modify the logits!
        return scores
 # Stores the perplexity and top probabilities
 ppl_logits_processor = None
 def logits_processor_modifier(logits_processor_list, input_ids):
    global ppl_logits_processor
    if params['active']:
        ppl_logits_processor = PerplexityLogits(verbose=params['verbose'])
        logits_processor_list.append(ppl_logits_processor)
 def output_modifier(text):
    global ppl_logits_processor
    # t0 = time.time()
    if not params['active']:
        return text
    # TODO: It's probably more efficient to do this above rather than modifying all these lists
    # Remove last element of perplexities_list, top_token_ids_list, top_tokens_list, top_probs_list since everything is off by one because this extension runs before generation
    perplexities = ppl_logits_processor.perplexities_list[:-1]
    top_token_ids_list = ppl_logits_processor.top_token_ids_list[:-1]
    top_tokens_list = [[shared.tokenizer.decode(token_id) for token_id in top_token_ids[0]] for top_token_ids in top_token_ids_list]
    top_probs_list = ppl_logits_processor.top_probs_list[:-1]
    # Remove first element of generated_token_ids, generated_tokens, selected_probs because they are for the last token of the prompt
    gen_token_ids = ppl_logits_processor.generated_token_ids[1:]
    gen_tokens = [shared.tokenizer.decode(token_id) for token_id in gen_token_ids]
    sel_probs = ppl_logits_processor.selected_probs[1:]
    end_part = '</div></div>' if params['probability_dropdown'] else '</span>'  # Helps with finding the index after replacing part of the text.
    i = 0
    for token, prob, ppl, top_tokens, top_probs in zip(gen_tokens, sel_probs, perplexities, top_tokens_list, top_probs_list):
        color = 'ffffff'
        if params['color_by_probability'] and params['color_by_perplexity']:
            color = probability_perplexity_color_scale(prob, ppl)
        elif params['color_by_perplexity']:
            color = perplexity_color_scale(ppl)
        elif params['color_by_probability']:
            color = probability_color_scale(prob)
        if token in text[i:]:
            if params['probability_dropdown']:
                text = text[:i] + text[i:].replace(token, add_dropdown_html(token, color, top_tokens, top_probs[0], ppl), 1)
            else:
                text = text[:i] + text[i:].replace(token, add_color_html(token, color), 1)
            i += text[i:].find(end_part) + len(end_part)
    # Use full perplexity list for calculating the average here.
    print('Average perplexity:', round(np.mean(ppl_logits_processor.perplexities_list[:-1]), 4))
    # t1 = time.time()
    # print(f"Modifier: {(t1-t0):.3f} s")
    # About 50 ms
    return text
 def probability_color_scale(prob):
    '''
    Green-yellow-red color scale
    '''
    rv = 0
    gv = 0
    if prob <= 0.5:
        rv = 'ff'
        gv = hex(int(255 * prob * 2))[2:]
        if len(gv) < 2:
            gv = '0' * (2 - len(gv)) + gv
    else:
        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
        gv = 'ff'
        if len(rv) < 2:
            rv = '0' * (2 - len(rv)) + rv
    return rv + gv + '00'
 def perplexity_color_scale(ppl):
    '''
    Red component only, white for 0 perplexity (sorry if you're not in dark mode)
    '''
    value = hex(max(int(255.0 - params['ppl_scale'] * (float(ppl) - 1.0)), 0))[2:]
    if len(value) < 2:
        value = '0' * (2 - len(value)) + value
    return 'ff' + value + value
 def probability_perplexity_color_scale(prob, ppl):
    '''
    Green-yellow-red for probability and blue component for perplexity
    '''
    rv = 0
    gv = 0
    bv = hex(min(max(int(params['ppl_scale'] * (float(ppl) - 1.0)), 0), 255))[2:]
    if len(bv) < 2:
        bv = '0' * (2 - len(bv)) + bv
    if prob <= 0.5:
        rv = 'ff'
        gv = hex(int(255 * prob * 2))[2:]
        if len(gv) < 2:
            gv = '0' * (2 - len(gv)) + gv
    else:
        rv = hex(int(255 - 255 * (prob - 0.5) * 2))[2:]
        gv = 'ff'
        if len(rv) < 2:
            rv = '0' * (2 - len(rv)) + rv
    return rv + gv + bv
 def add_color_html(token, color):
    return f'<span style="color: #{color}">{token}</span>'
 # TODO: Major issue: Applying this to too many tokens will cause a permanent slowdown in generation speed until the messages are removed from the history.
 # I think the issue is from HTML elements taking up space in the visible history, and things like history deepcopy add latency proportional to the size of the history.
 # Potential solution is maybe to modify the main generation code to send just the internal text and not the visible history, to avoid moving too much around.
 # I wonder if we can also avoid using deepcopy here.
 def add_dropdown_html(token, color, top_tokens, top_probs, perplexity=0):
    html = f'<div class="hoverable"><span style="color: #{color}">{token}</span><div class="dropdown"><table class="dropdown-content"><tbody>'
    for token_option, prob in zip(top_tokens, top_probs):
        # TODO: Bold for selected token?
        # Using divs prevented the problem of divs inside spans causing issues.
        # Now the problem is that divs show the same whitespace of one space between every token.
        # There is probably some way to fix this in CSS that I don't know about.
        row_color = probability_color_scale(prob)
        row_class = ' class="selected"' if token_option == token else ''
        html += f'<tr{row_class}><td style="color: #{row_color}">{token_option}</td><td style="color: #{row_color}">{prob:.4f}</td></tr>'
    if perplexity != 0:
        ppl_color = perplexity_color_scale(perplexity)
        html += f'<tr><td>Perplexity:</td><td style="color: #{ppl_color}">{perplexity:.4f}</td></tr>'
    html += '</tbody></table></div></div>'
    return html  # About 750 characters per token...
 def custom_css():
    return """
        .dropdown {
            display: none;
            position: absolute;
            z-index: 50;
            background-color: var(--block-background-fill);
            box-shadow: 0px 8px 16px 0px rgba(0,0,0,0.2);
            width: max-content;
            overflow: visible;
            padding: 5px;
            border-radius: 10px;
            border: 1px solid var(--border-color-primary);
        }
        .dropdown-content {
            border: none;
            z-index: 50;
        }
        .dropdown-content tr.selected {
            background-color: var(--block-label-background-fill);
        }
        .dropdown-content td {
            color: var(--body-text-color);
        }
        .hoverable {
            color: var(--body-text-color);
            position: relative;
            display: inline-block;
            overflow: visible;
            font-size: 15px;
            line-height: 1.75;
            margin: 0;
            padding: 0;
        }
        .hoverable:hover .dropdown {
            display: block;
        }
        pre {
            white-space: pre-wrap;
        }
        # TODO: This makes the hover menus extend outside the bounds of the chat area, which is good.
        # However, it also makes the scrollbar disappear, which is bad.
        # The scroll bar needs to still be present. So for now, we can't see dropdowns that extend past the edge of the chat area.
        #.chat {
        #    overflow-y: auto;
        #}
    """
 # Monkeypatch applied to html_generator.py
 # We simply don't render markdown into HTML. We wrap everything in <pre> tags to preserve whitespace
 # formatting. If you're coloring tokens by perplexity or probability, or especially if you're using
 # the probability dropdown, you probably care more about seeing the tokens the model actually outputted
 # rather than rendering ```code blocks``` or *italics*.
 def convert_to_markdown(string):
    return '<pre>' + string + '</pre>'
 html_generator.convert_to_markdown = convert_to_markdown
 def ui():
    def update_active_check(x):
        params.update({'active': x})
    def update_color_by_ppl_check(x):
        params.update({'color_by_perplexity': x})
    def update_color_by_prob_check(x):
        params.update({'color_by_probability': x})
    def update_prob_dropdown_check(x):
        params.update({'probability_dropdown': x})
    active_check = gradio.Checkbox(value=True, label="Compute probabilities and perplexity scores", info="Activate this extension. Note that this extension currently does not work with exllama or llama.cpp.")
    color_by_ppl_check = gradio.Checkbox(value=False, label="Color by perplexity", info="Higher perplexity is more red. If also showing probability, higher perplexity has more blue component.")
    color_by_prob_check = gradio.Checkbox(value=False, label="Color by probability", info="Green-yellow-red linear scale, with 100% green, 50% yellow, 0% red.")
    prob_dropdown_check = gradio.Checkbox(value=False, label="Probability dropdown", info="Hover over a token to show a dropdown of top token probabilities. Currently slightly buggy with whitespace between tokens.")
    active_check.change(update_active_check, active_check, None)
    color_by_ppl_check.change(update_color_by_ppl_check, color_by_ppl_check, None)
    color_by_prob_check.change(update_color_by_prob_check, color_by_prob_check, None)
    prob_dropdown_check.change(update_prob_dropdown_check, prob_dropdown_check, None)
--- a/swarms/modelui/extensions/sd_api_pictures/README.MD
+++ b/swarms/modelui/extensions/sd_api_pictures/README.MD
@ -0,0 +1,90 @@
 ## Description:
 TL;DR: Lets the bot answer you with a picture!  
 Stable Diffusion API pictures for TextGen, v.1.2.0  
 An extension to [oobabooga's textgen-webui](https://github.com/oobabooga/text-generation-webui) allowing you to receive pics generated by [Automatic1111's SD-WebUI API](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
 <details>
 <summary>Interface overview</summary>
 ![Interface](https://raw.githubusercontent.com/Brawlence/SD_api_pics/main/illust/Interface.jpg)
 </details>
 Load it in the `--chat` mode with `--extension sd_api_pictures` alongside `send_pictures`
 (it's not really required, but completes the picture, *pun intended*).  
 ## History
 Consider the version included with [oobabooga's repository](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/sd_api_pictures) to be STABLE, experimental developments and untested features are pushed in [Brawlence/SD_api_pics](https://github.com/Brawlence/SD_api_pics)
 Lastest change:  
 1.1.0 → 1.1.1 Fixed not having Auto1111's metadata in received images
 ## Details
 The image generation is triggered:  
 - manually through the 'Force the picture response' button while in `Manual` or `Immersive/Interactive` modes OR  
 - automatically in `Immersive/Interactive` mode if the words `'send|main|message|me'` are followed by `'image|pic|picture|photo|snap|snapshot|selfie|meme'` in the user's prompt  
 - always on in `Picturebook/Adventure` mode (if not currently suppressed by 'Suppress the picture response')  
 ## Prerequisites
 One needs an available instance of Automatic1111's webui running with an `--api` flag. Ain't tested with a notebook / cloud hosted one but should be possible.   
 To run it locally in parallel on the same machine, specify custom `--listen-port` for either Auto1111's or ooba's webUIs.  
 ## Features overview
 - Connection to API check (press enter in the address box)  
 - [VRAM management (model shuffling)](https://github.com/Brawlence/SD_api_pics/wiki/VRAM-management-feature)  
 - [Three different operation modes](https://github.com/Brawlence/SD_api_pics/wiki/Modes-of-operation) (manual, interactive, always-on)  
 - User-defined persistent settings via settings.json
 ### Connection check
 Insert the Automatic1111's WebUI address and press Enter:  
 ![API-check](https://raw.githubusercontent.com/Brawlence/SD_api_pics/main/illust/API-check.gif)  
 Green mark confirms the ability to communicate with Auto1111's API on this address. Red cross means something's not right (the ext won't work).
 ### Persistents settings
 Create or modify the `settings.json` in the `text-generation-webui` root directory to override the defaults
 present in script.py, ex:
 ```json
 {
    "sd_api_pictures-manage_VRAM": 1,
    "sd_api_pictures-save_img": 1,
    "sd_api_pictures-prompt_prefix": "(Masterpiece:1.1), detailed, intricate, colorful, (solo:1.1)",
    "sd_api_pictures-sampler_name": "DPM++ 2M Karras"
 }
 ```
 will automatically set the `Manage VRAM` & `Keep original images` checkboxes and change the texts in `Prompt Prefix` and `Sampler name` on load.
 ---
 ## Demonstrations:
 Those are examples of the version 1.0.0, but the core functionality is still the same
 <details>
 <summary>Conversation 1</summary>
 ![EXA1](https://user-images.githubusercontent.com/42910943/224866564-939a3bcb-e7cf-4ac0-a33f-b3047b55054d.jpg)
 ![EXA2](https://user-images.githubusercontent.com/42910943/224866566-38394054-1320-45cf-9515-afa76d9d7745.jpg)
 ![EXA3](https://user-images.githubusercontent.com/42910943/224866568-10ea47b7-0bac-4269-9ec9-22c387a13b59.jpg)
 ![EXA4](https://user-images.githubusercontent.com/42910943/224866569-326121ad-1ea1-4874-9f6b-4bca7930a263.jpg)
 </details>
 <details>
 <summary>Conversation 2</summary>
 ![Hist1](https://user-images.githubusercontent.com/42910943/224865517-c6966b58-bc4d-4353-aab9-6eb97778d7bf.jpg)
 ![Hist2](https://user-images.githubusercontent.com/42910943/224865527-b2fe7c2e-0da5-4c2e-b705-42e233b07084.jpg)
 ![Hist3](https://user-images.githubusercontent.com/42910943/224865535-a38d94e7-8975-4a46-a655-1ae1de41f85d.jpg)
 </details>
--- a/swarms/modelui/extensions/sd_api_pictures/script.py
+++ b/swarms/modelui/extensions/sd_api_pictures/script.py
@ -0,0 +1,386 @@
 import base64
 import io
 import re
 import time
 from datetime import date
 from pathlib import Path
 import gradio as gr
 import requests
 import torch
 from PIL import Image
 from modules import shared
 from modules.models import reload_model, unload_model
 from modules.ui import create_refresh_button
 torch._C._jit_set_profiling_mode(False)
 # parameters which can be customized in settings.json of webui
 params = {
    'address': 'http://127.0.0.1:7860',
    'mode': 0,  # modes of operation: 0 (Manual only), 1 (Immersive/Interactive - looks for words to trigger), 2 (Picturebook Adventure - Always on)
    'manage_VRAM': False,
    'save_img': False,
    'SD_model': 'NeverEndingDream',  # not used right now
    'prompt_prefix': '(Masterpiece:1.1), detailed, intricate, colorful',
    'negative_prompt': '(worst quality, low quality:1.3)',
    'width': 512,
    'height': 512,
    'denoising_strength': 0.61,
    'restore_faces': False,
    'enable_hr': False,
    'hr_upscaler': 'ESRGAN_4x',
    'hr_scale': '1.0',
    'seed': -1,
    'sampler_name': 'DPM++ 2M Karras',
    'steps': 32,
    'cfg_scale': 7,
    'textgen_prefix': 'Please provide a detailed and vivid description of [subject]',
    'sd_checkpoint': ' ',
    'checkpoint_list': [" "]
 }
 def give_VRAM_priority(actor):
    global shared, params
    if actor == 'SD':
        unload_model()
        print("Requesting Auto1111 to re-load last checkpoint used...")
        response = requests.post(url=f'{params["address"]}/sdapi/v1/reload-checkpoint', json='')
        response.raise_for_status()
    elif actor == 'LLM':
        print("Requesting Auto1111 to vacate VRAM...")
        response = requests.post(url=f'{params["address"]}/sdapi/v1/unload-checkpoint', json='')
        response.raise_for_status()
        reload_model()
    elif actor == 'set':
        print("VRAM mangement activated -- requesting Auto1111 to vacate VRAM...")
        response = requests.post(url=f'{params["address"]}/sdapi/v1/unload-checkpoint', json='')
        response.raise_for_status()
    elif actor == 'reset':
        print("VRAM mangement deactivated -- requesting Auto1111 to reload checkpoint")
        response = requests.post(url=f'{params["address"]}/sdapi/v1/reload-checkpoint', json='')
        response.raise_for_status()
    else:
        raise RuntimeError(f'Managing VRAM: "{actor}" is not a known state!')
    response.raise_for_status()
    del response
 if params['manage_VRAM']:
    give_VRAM_priority('set')
 SD_models = ['NeverEndingDream']  # TODO: get with http://{address}}/sdapi/v1/sd-models and allow user to select
 picture_response = False  # specifies if the next model response should appear as a picture
 def remove_surrounded_chars(string):
    # this expression matches to 'as few symbols as possible (0 upwards) between any asterisks' OR
    # 'as few symbols as possible (0 upwards) between an asterisk and the end of the string'
    return re.sub('\*[^\*]*?(\*|$)', '', string)
 def triggers_are_in(string):
    string = remove_surrounded_chars(string)
    # regex searches for send|main|message|me (at the end of the word) followed by
    # a whole word of image|pic|picture|photo|snap|snapshot|selfie|meme(s),
    # (?aims) are regex parser flags
    return bool(re.search('(?aims)(send|mail|message|me)\\b.+?\\b(image|pic(ture)?|photo|snap(shot)?|selfie|meme)s?\\b', string))
 def state_modifier(state):
    if picture_response:
        state['stream'] = False
    return state
 def input_modifier(string):
    """
    This function is applied to your text inputs before
    they are fed into the model.
    """
    global params
    if not params['mode'] == 1:  # if not in immersive/interactive mode, do nothing
        return string
    if triggers_are_in(string):  # if we're in it, check for trigger words
        toggle_generation(True)
        string = string.lower()
        if "of" in string:
            subject = string.split('of', 1)[1]  # subdivide the string once by the first 'of' instance and get what's coming after it
            string = params['textgen_prefix'].replace("[subject]", subject)
        else:
            string = params['textgen_prefix'].replace("[subject]", "your appearance, your surroundings and what you are doing right now")
    return string
 # Get and save the Stable Diffusion-generated picture
 def get_SD_pictures(description, character):
    global params
    if params['manage_VRAM']:
        give_VRAM_priority('SD')
    description = re.sub('<audio.*?</audio>', ' ', description)
    description = f"({description}:1)"
    payload = {
        "prompt": params['prompt_prefix'] + description,
        "seed": params['seed'],
        "sampler_name": params['sampler_name'],
        "enable_hr": params['enable_hr'],
        "hr_scale": params['hr_scale'],
        "hr_upscaler": params['hr_upscaler'],
        "denoising_strength": params['denoising_strength'],
        "steps": params['steps'],
        "cfg_scale": params['cfg_scale'],
        "width": params['width'],
        "height": params['height'],
        "restore_faces": params['restore_faces'],
        "override_settings_restore_afterwards": True,
        "negative_prompt": params['negative_prompt']
    }
    print(f'Prompting the image generator via the API on {params["address"]}...')
    response = requests.post(url=f'{params["address"]}/sdapi/v1/txt2img', json=payload)
    response.raise_for_status()
    r = response.json()
    visible_result = ""
    for img_str in r['images']:
        if params['save_img']:
            img_data = base64.b64decode(img_str)
            variadic = f'{date.today().strftime("%Y_%m_%d")}/{character}_{int(time.time())}'
            output_file = Path(f'extensions/sd_api_pictures/outputs/{variadic}.png')
            output_file.parent.mkdir(parents=True, exist_ok=True)
            with open(output_file.as_posix(), 'wb') as f:
                f.write(img_data)
            visible_result = visible_result + f'<img src="/file/extensions/sd_api_pictures/outputs/{variadic}.png" alt="{description}" style="max-width: unset; max-height: unset;">\n'
        else:
            image = Image.open(io.BytesIO(base64.b64decode(img_str.split(",", 1)[0])))
            # lower the resolution of received images for the chat, otherwise the log size gets out of control quickly with all the base64 values in visible history
            image.thumbnail((300, 300))
            buffered = io.BytesIO()
            image.save(buffered, format="JPEG")
            buffered.seek(0)
            image_bytes = buffered.getvalue()
            img_str = "data:image/jpeg;base64," + base64.b64encode(image_bytes).decode()
            visible_result = visible_result + f'<img src="{img_str}" alt="{description}">\n'
    if params['manage_VRAM']:
        give_VRAM_priority('LLM')
    return visible_result
 # TODO: how do I make the UI history ignore the resulting pictures (I don't want HTML to appear in history)
 # and replace it with 'text' for the purposes of logging?
 def output_modifier(string, state):
    """
    This function is applied to the model outputs.
    """
    global picture_response, params
    if not picture_response:
        return string
    string = remove_surrounded_chars(string)
    string = string.replace('"', '')
    string = string.replace('“', '')
    string = string.replace('\n', ' ')
    string = string.strip()
    if string == '':
        string = 'no viable description in reply, try regenerating'
        return string
    text = ""
    if (params['mode'] < 2):
        toggle_generation(False)
        text = f'*Sends a picture which portrays: “{string}”*'
    else:
        text = string
    string = get_SD_pictures(string, state['character_menu']) + "\n" + text
    return string
 def bot_prefix_modifier(string):
    """
    This function is only applied in chat mode. It modifies
    the prefix text for the Bot and can be used to bias its
    behavior.
    """
    return string
 def toggle_generation(*args):
    global picture_response, shared
    if not args:
        picture_response = not picture_response
    else:
        picture_response = args[0]
    shared.processing_message = "*Is sending a picture...*" if picture_response else "*Is typing...*"
 def filter_address(address):
    address = address.strip()
    # address = re.sub('http(s)?:\/\/|\/$','',address) # remove starting http:// OR https:// OR trailing slash
    address = re.sub('\/$', '', address)  # remove trailing /s
    if not address.startswith('http'):
        address = 'http://' + address
    return address
 def SD_api_address_update(address):
    global params
    msg = "✔️ SD API is found on:"
    address = filter_address(address)
    params.update({"address": address})
    try:
        response = requests.get(url=f'{params["address"]}/sdapi/v1/sd-models')
        response.raise_for_status()
        # r = response.json()
    except:
        msg = "❌ No SD API endpoint on:"
    return gr.Textbox.update(label=msg)
 def custom_css():
    path_to_css = Path(__file__).parent.resolve() / 'style.css'
    return open(path_to_css, 'r').read()
 def get_checkpoints():
    global params
    try:
        models = requests.get(url=f'{params["address"]}/sdapi/v1/sd-models')
        options = requests.get(url=f'{params["address"]}/sdapi/v1/options')
        options_json = options.json()
        params['sd_checkpoint'] = options_json['sd_model_checkpoint']
        params['checkpoint_list'] = [result["title"] for result in models.json()]
    except:
        params['sd_checkpoint'] = ""
        params['checkpoint_list'] = []
    return gr.update(choices=params['checkpoint_list'], value=params['sd_checkpoint'])
 def load_checkpoint(checkpoint):
    payload = {
        "sd_model_checkpoint": checkpoint
    }
    try:
        requests.post(url=f'{params["address"]}/sdapi/v1/options', json=payload)
    except:
        pass
 def get_samplers():
    try:
        response = requests.get(url=f'{params["address"]}/sdapi/v1/samplers')
        response.raise_for_status()
        samplers = [x["name"] for x in response.json()]
    except:
        samplers = []
    return samplers
 def ui():
    # Gradio elements
    # gr.Markdown('### Stable Diffusion API Pictures') # Currently the name of extension is shown as the title
    with gr.Accordion("Parameters", open=True, elem_classes="SDAP"):
        with gr.Row():
            address = gr.Textbox(placeholder=params['address'], value=params['address'], label='Auto1111\'s WebUI address')
            modes_list = ["Manual", "Immersive/Interactive", "Picturebook/Adventure"]
            mode = gr.Dropdown(modes_list, value=modes_list[params['mode']], label="Mode of operation", type="index")
            with gr.Column(scale=1, min_width=300):
                manage_VRAM = gr.Checkbox(value=params['manage_VRAM'], label='Manage VRAM')
                save_img = gr.Checkbox(value=params['save_img'], label='Keep original images and use them in chat')
            force_pic = gr.Button("Force the picture response")
            suppr_pic = gr.Button("Suppress the picture response")
        with gr.Row():
            checkpoint = gr.Dropdown(params['checkpoint_list'], value=params['sd_checkpoint'], label="Checkpoint", type="value")
            update_checkpoints = gr.Button("Get list of checkpoints")
        with gr.Accordion("Generation parameters", open=False):
            prompt_prefix = gr.Textbox(placeholder=params['prompt_prefix'], value=params['prompt_prefix'], label='Prompt Prefix (best used to describe the look of the character)')
            textgen_prefix = gr.Textbox(placeholder=params['textgen_prefix'], value=params['textgen_prefix'], label='textgen prefix (type [subject] where the subject should be placed)')
            negative_prompt = gr.Textbox(placeholder=params['negative_prompt'], value=params['negative_prompt'], label='Negative Prompt')
            with gr.Row():
                with gr.Column():
                    width = gr.Slider(64, 2048, value=params['width'], step=64, label='Width')
                    height = gr.Slider(64, 2048, value=params['height'], step=64, label='Height')
                with gr.Column(variant="compact", elem_id="sampler_col"):
                    with gr.Row(elem_id="sampler_row"):
                        sampler_name = gr.Dropdown(value=params['sampler_name'], allow_custom_value=True, label='Sampling method', elem_id="sampler_box")
                        create_refresh_button(sampler_name, lambda: None, lambda: {'choices': get_samplers()}, 'refresh-button')
                    steps = gr.Slider(1, 150, value=params['steps'], step=1, label="Sampling steps", elem_id="steps_box")
            with gr.Row():
                seed = gr.Number(label="Seed", value=params['seed'], elem_id="seed_box")
                cfg_scale = gr.Number(label="CFG Scale", value=params['cfg_scale'], elem_id="cfg_box")
                with gr.Column() as hr_options:
                    restore_faces = gr.Checkbox(value=params['restore_faces'], label='Restore faces')
                    enable_hr = gr.Checkbox(value=params['enable_hr'], label='Hires. fix')
            with gr.Row(visible=params['enable_hr'], elem_classes="hires_opts") as hr_options:
                hr_scale = gr.Slider(1, 4, value=params['hr_scale'], step=0.1, label='Upscale by')
                denoising_strength = gr.Slider(0, 1, value=params['denoising_strength'], step=0.01, label='Denoising strength')
                hr_upscaler = gr.Textbox(placeholder=params['hr_upscaler'], value=params['hr_upscaler'], label='Upscaler')
    # Event functions to update the parameters in the backend
    address.change(lambda x: params.update({"address": filter_address(x)}), address, None)
    mode.select(lambda x: params.update({"mode": x}), mode, None)
    mode.select(lambda x: toggle_generation(x > 1), inputs=mode, outputs=None)
    manage_VRAM.change(lambda x: params.update({"manage_VRAM": x}), manage_VRAM, None)
    manage_VRAM.change(lambda x: give_VRAM_priority('set' if x else 'reset'), inputs=manage_VRAM, outputs=None)
    save_img.change(lambda x: params.update({"save_img": x}), save_img, None)
    address.submit(fn=SD_api_address_update, inputs=address, outputs=address)
    prompt_prefix.change(lambda x: params.update({"prompt_prefix": x}), prompt_prefix, None)
    textgen_prefix.change(lambda x: params.update({"textgen_prefix": x}), textgen_prefix, None)
    negative_prompt.change(lambda x: params.update({"negative_prompt": x}), negative_prompt, None)
    width.change(lambda x: params.update({"width": x}), width, None)
    height.change(lambda x: params.update({"height": x}), height, None)
    hr_scale.change(lambda x: params.update({"hr_scale": x}), hr_scale, None)
    denoising_strength.change(lambda x: params.update({"denoising_strength": x}), denoising_strength, None)
    restore_faces.change(lambda x: params.update({"restore_faces": x}), restore_faces, None)
    hr_upscaler.change(lambda x: params.update({"hr_upscaler": x}), hr_upscaler, None)
    enable_hr.change(lambda x: params.update({"enable_hr": x}), enable_hr, None)
    enable_hr.change(lambda x: hr_options.update(visible=params["enable_hr"]), enable_hr, hr_options)
    update_checkpoints.click(get_checkpoints, None, checkpoint)
    checkpoint.change(lambda x: params.update({"sd_checkpoint": x}), checkpoint, None)
    checkpoint.change(load_checkpoint, checkpoint, None)
    sampler_name.change(lambda x: params.update({"sampler_name": x}), sampler_name, None)
    steps.change(lambda x: params.update({"steps": x}), steps, None)
    seed.change(lambda x: params.update({"seed": x}), seed, None)
    cfg_scale.change(lambda x: params.update({"cfg_scale": x}), cfg_scale, None)
    force_pic.click(lambda x: toggle_generation(True), inputs=force_pic, outputs=None)
    suppr_pic.click(lambda x: toggle_generation(False), inputs=suppr_pic, outputs=None)
--- a/swarms/modelui/extensions/sd_api_pictures/style.css
+++ b/swarms/modelui/extensions/sd_api_pictures/style.css
@ -0,0 +1,52 @@
 /* Align the elements for SD_api_picture extension */
 .SDAP #sampler_box {
  padding-top: var(--spacing-sm);
  padding-bottom: var(--spacing-sm);
  border: 0;
 }
 .SDAP #steps_box {
  border-radius: 0 0 var(--block-radius) var(--block-radius);
 }
 .SDAP #sampler_col {
  gap: 0;
  padding: 0;
  background-color: transparent;
 }
 .SDAP #sampler_row {
    border-bottom: 0;
    box-shadow: var(--block-shadow);
    border-width: var(--block-border-width);
    border-color: var(--block-border-color);
    border-radius: var(--block-radius) var(--block-radius) 0 0;
    background: var(--block-background-fill);
    gap: 0;
 }
 .SDAP #sampler_row .refresh-button {
    margin-bottom: var(--spacing-sm);
    margin-right: var(--spacing-lg);
 }
 .SDAP #seed_box,
 .SDAP #cfg_box {
  padding-top: var(--spacing-md);
 }
 .SDAP #sampler_box span,
 .SDAP #seed_box span,
 .SDAP #cfg_box span,
 .SDAP #steps_box span {
  margin-bottom: var(--spacing-sm);
 }
 .SDAP svg.dropdown-arrow {
  flex-shrink: 0 !important;
  margin: 0px !important;
 }
 .SDAP .hires_opts input[type="number"] {
  width: 6em !important;
 }
--- a/swarms/modelui/extensions/send_pictures/script.py
+++ b/swarms/modelui/extensions/send_pictures/script.py
@ -0,0 +1,58 @@
 import base64
 from io import BytesIO
 import gradio as gr
 import torch
 from transformers import BlipForConditionalGeneration, BlipProcessor
 from modules import chat, shared, ui_chat
 from modules.ui import gather_interface_values
 from modules.utils import gradio
 input_hijack = {
    'state': False,
    'value': ["", ""]
 }
 processor = BlipProcessor.from_pretrained("Salesforce/blip-image-captioning-base")
 model = BlipForConditionalGeneration.from_pretrained("Salesforce/blip-image-captioning-base", torch_dtype=torch.float32).to("cpu")
 def chat_input_modifier(text, visible_text, state):
    global input_hijack
    if input_hijack['state']:
        input_hijack['state'] = False
        return input_hijack['value']
    else:
        return text, visible_text
 def caption_image(raw_image):
    inputs = processor(raw_image.convert('RGB'), return_tensors="pt").to("cpu", torch.float32)
    out = model.generate(**inputs, max_new_tokens=100)
    return processor.decode(out[0], skip_special_tokens=True)
 def generate_chat_picture(picture, name1, name2):
    text = f'*{name1} sends {name2} a picture that contains the following: “{caption_image(picture)}”*'
    # lower the resolution of sent images for the chat, otherwise the log size gets out of control quickly with all the base64 values in visible history
    picture.thumbnail((300, 300))
    buffer = BytesIO()
    picture.save(buffer, format="JPEG")
    img_str = base64.b64encode(buffer.getvalue()).decode('utf-8')
    visible_text = f'<img src="data:image/jpeg;base64,{img_str}" alt="{text}">'
    return text, visible_text
 def ui():
    picture_select = gr.Image(label='Send a picture', type='pil')
    # Prepare the input hijack, update the interface values, call the generation function, and clear the picture
    picture_select.upload(
        lambda picture, name1, name2: input_hijack.update({
            "state": True,
            "value": generate_chat_picture(picture, name1, name2)
        }), [picture_select, shared.gradio['name1'], shared.gradio['name2']], None).then(
        gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
        chat.generate_chat_reply_wrapper, gradio(ui_chat.inputs), gradio('display', 'history'), show_progress=False).then(
        lambda: None, None, picture_select, show_progress=False)
--- a/Show More
+++ b/Show More