fix: Refactor open ai api

Former-commit-id: a044ef733eef9097069b5697669aaebed435ce10
pull/133/head
Zack 1 year ago
parent 71133ebdaa
commit 374efe3411

@ -1,6 +1,8 @@
from swarms.models import OpenAIChat
from swarms.structs import Flow
from langchain.schema.messages import ChatMessage
message = ChatMessage(role="user", content='Translate the following English text to French: Hello World"')
api_key = ""
# Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC
@ -8,6 +10,7 @@ llm = OpenAIChat(
# model_name="gpt-4"
openai_api_key=api_key,
temperature=0.5,
message = message
# max_tokens=100,
)
@ -15,7 +18,7 @@ llm = OpenAIChat(
## Initialize the workflow
flow = Flow(
llm=llm,
max_loops=5,
max_loops=2,
dashboard=True,
# tools = [search_api, slack, ]
# stopping_condition=None, # You can define a stopping condition as needed.
@ -29,9 +32,7 @@ flow = Flow(
# out = flow.load_state("flow_state.json")
# temp = flow.dynamic_temperature()
# filter = flow.add_response_filter("Trump")
out = flow.run(
"Generate a 10,000 word blog on mental clarity and the benefits of meditation."
)
out = flow.run(message)
# out = flow.validate_response(out)
# out = flow.analyze_feedback(out)
# out = flow.print_history_and_memory()

@ -1,13 +1,14 @@
import sys
log_file = open("errors.txt", "w")
sys.stderr = log_file
# log_file = open("errors.txt", "w")
# sys.stderr = log_file
# LLMs
from swarms.models.anthropic import Anthropic
from swarms.models.petals import Petals
from swarms.models.mistral import Mistral
from swarms.models.openai_models import OpenAI, AzureOpenAI, OpenAIChat
from swarms.models.openai_models import OpenAIChat, AzureOpenAI, OpenAI
from swarms.models.zephyr import Zephyr
from swarms.models.biogpt import BioGPT
from swarms.models.huggingface import HuggingfaceLLM

@ -1,6 +1,6 @@
"""OpenAI chat wrapper."""
from __future__ import annotations
"""OpenAI chat wrapper."""
import logging
import os
import sys
@ -72,7 +72,7 @@ def _import_tiktoken() -> Any:
def _create_retry_decorator(
llm: ChatOpenAI,
llm: OpenAIChat,
run_manager: Optional[
Union[AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun]
] = None,
@ -92,7 +92,7 @@ def _create_retry_decorator(
async def acompletion_with_retry(
llm: ChatOpenAI,
llm: OpenAIChat,
run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
**kwargs: Any,
) -> Any:
@ -139,8 +139,91 @@ def _convert_delta_to_message_chunk(
else:
return default_class(content=content)
class OpenAI(BaseChatModel):
"""OpenAI large language models.
To use, you should have the ``openai`` python package installed, and the
environment variable ``OPENAI_API_KEY`` set with your API key.
Any parameters that are valid to be passed to the openai.create call can be passed
in, even if not explicitly saved on this class..,
Example:
.. code-block:: python
from swarms.models import OpenAI
openai = OpenAI(model_name="text-davinci-003")
openai("What is the report on the 2022 oympian games?")
"""
@property
def _invocation_params(self) -> Dict[str, Any]:
return {**{"model": self.model_name}, **super()._invocation_params}
class AzureOpenAI(BaseChatModel):
"""Azure-specific OpenAI large language models.
To use, you should have the ``openai`` python package installed, and the
environment variable ``OPENAI_API_KEY`` set with your API key.
Any parameters that are valid to be passed to the openai.create call can be passed
in, even if not explicitly saved on this class.
Example:
.. code-block:: python
from swarms.models import AzureOpenAI
openai = AzureOpenAI(model_name="text-davinci-003")
"""
deployment_name: str = ""
"""Deployment name to use."""
openai_api_type: str = ""
openai_api_version: str = ""
@root_validator()
def validate_azure_settings(cls, values: Dict) -> Dict:
values["openai_api_version"] = get_from_dict_or_env(
values,
"openai_api_version",
"OPENAI_API_VERSION",
)
values["openai_api_type"] = get_from_dict_or_env(
values, "openai_api_type", "OPENAI_API_TYPE", "azure"
)
return values
@property
def _identifying_params(self) -> Mapping[str, Any]:
return {
**{"deployment_name": self.deployment_name},
**super()._identifying_params,
}
@property
def _invocation_params(self) -> Dict[str, Any]:
openai_params = {
"engine": self.deployment_name,
"api_type": self.openai_api_type,
"api_version": self.openai_api_version,
}
return {**openai_params, **super()._invocation_params}
@property
def _llm_type(self) -> str:
"""Return type of llm."""
return "azure"
@property
def lc_attributes(self) -> Dict[str, Any]:
return {
"openai_api_type": self.openai_api_type,
"openai_api_version": self.openai_api_version,
}
class ChatOpenAI(BaseChatModel):
class OpenAIChat(BaseChatModel):
"""`OpenAI` Chat large language models API.
To use, you should have the ``openai`` python package installed, and the

@ -517,7 +517,7 @@ class Flow:
def _run(self, **kwargs: Any) -> str:
"""Generate a result using the provided keyword args."""
task = self.format_prompt(**kwargs)
response, history = self._generate(task, task)
response, history = self.llm._generate([ task, task ])
logging.info(f"Message history: {history}")
return response

@ -1,188 +0,0 @@
import asyncio
import os
from contextlib import contextmanager
from typing import Optional
import pandas as pd
import torch
from langchain.agents import tool
from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
from langchain.chains.qa_with_sources.loading import (
BaseCombineDocumentsChain,
)
from langchain.docstore.document import Document
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.tools import BaseTool
from PIL import Image
from pydantic import Field
from transformers import (
BlipForQuestionAnswering,
BlipProcessor,
)
from swarms.utils.logger import logger
ROOT_DIR = "./data/"
@contextmanager
def pushd(new_dir):
"""Context manager for changing the current working directory."""
prev_dir = os.getcwd()
os.chdir(new_dir)
try:
yield
finally:
os.chdir(prev_dir)
@tool
def process_csv(
llm, csv_file_path: str, instructions: str, output_path: Optional[str] = None
) -> str:
"""Process a CSV by with pandas in a limited REPL.\
Only use this after writing data to disk as a csv file.\
Any figures must be saved to disk to be viewed by the human.\
Instructions should be written in natural language, not code. Assume the dataframe is already loaded."""
with pushd(ROOT_DIR):
try:
df = pd.read_csv(csv_file_path)
except Exception as e:
return f"Error: {e}"
agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=False)
if output_path is not None:
instructions += f" Save output to disk at {output_path}"
try:
result = agent.run(instructions)
return result
except Exception as e:
return f"Error: {e}"
async def async_load_playwright(url: str) -> str:
"""Load the specified URLs using Playwright and parse using BeautifulSoup."""
from bs4 import BeautifulSoup
from playwright.async_api import async_playwright
results = ""
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
try:
page = await browser.new_page()
await page.goto(url)
page_source = await page.content()
soup = BeautifulSoup(page_source, "html.parser")
for script in soup(["script", "style"]):
script.extract()
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
results = "\n".join(chunk for chunk in chunks if chunk)
except Exception as e:
results = f"Error: {e}"
await browser.close()
return results
def run_async(coro):
event_loop = asyncio.get_event_loop()
return event_loop.run_until_complete(coro)
@tool
def browse_web_page(url: str) -> str:
"""Verbose way to scrape a whole webpage. Likely to cause issues parsing."""
return run_async(async_load_playwright(url))
def _get_text_splitter():
return RecursiveCharacterTextSplitter(
# Set a really small chunk size, just to show.
chunk_size=500,
chunk_overlap=20,
length_function=len,
)
class WebpageQATool(BaseTool):
name = "query_webpage"
description = (
"Browse a webpage and retrieve the information relevant to the question."
)
text_splitter: RecursiveCharacterTextSplitter = Field(
default_factory=_get_text_splitter
)
qa_chain: BaseCombineDocumentsChain
def _run(self, url: str, question: str) -> str:
"""Useful for browsing websites and scraping the text information."""
result = browse_web_page.run(url)
docs = [Document(page_content=result, metadata={"source": url})]
web_docs = self.text_splitter.split_documents(docs)
results = []
# TODO: Handle this with a MapReduceChain
for i in range(0, len(web_docs), 4):
input_docs = web_docs[i : i + 4]
window_result = self.qa_chain(
{"input_documents": input_docs, "question": question},
return_only_outputs=True,
)
results.append(f"Response from window {i} - {window_result}")
results_docs = [
Document(page_content="\n".join(results), metadata={"source": url})
]
return self.qa_chain(
{"input_documents": results_docs, "question": question},
return_only_outputs=True,
)
async def _arun(self, url: str, question: str) -> str:
raise NotImplementedError
class EdgeGPTTool:
# Initialize the custom tool
def __init__(
self,
model,
name="EdgeGPTTool",
description="Tool that uses EdgeGPTModel to generate responses",
):
super().__init__(name=name, description=description)
self.model = model
def _run(self, prompt):
return self.model.__call__(prompt)
@tool
def VQAinference(self, inputs):
"""
Answer Question About The Image, VQA Multi-Modal Worker agent
description="useful when you need an answer for a question based on an image. "
"like: what is the background color of the last image, how many cats in this figure, what is in this figure. "
"The input to this tool should be a comma separated string of two, representing the image_path and the question",
"""
device = "cuda:0"
torch_dtype = torch.float16 if "cuda" in device else torch.float32
processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
model = BlipForQuestionAnswering.from_pretrained(
"Salesforce/blip-vqa-base", torch_dtype=torch_dtype
).to(device)
image_path, question = inputs.split(",")
raw_image = Image.open(image_path).convert("RGB")
inputs = processor(raw_image, question, return_tensors="pt").to(device, torch_dtype)
out = model.generate(**inputs)
answer = processor.decode(out[0], skip_special_tokens=True)
logger.debug(
f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input"
f" Question: {question}, Output Answer: {answer}"
)
return answer

@ -1,271 +0,0 @@
import os
import uuid
import numpy as np
import torch
from diffusers import (
EulerAncestralDiscreteScheduler,
StableDiffusionInpaintPipeline,
StableDiffusionInstructPix2PixPipeline,
StableDiffusionPipeline,
)
from PIL import Image
from transformers import (
BlipForConditionalGeneration,
BlipForQuestionAnswering,
BlipProcessor,
CLIPSegForImageSegmentation,
CLIPSegProcessor,
)
from swarms.prompts.prebuild.multi_modal_prompts import IMAGE_PROMPT
from swarms.tools.tool import tool
from swarms.utils.logger import logger
from swarms.utils.main import BaseHandler, get_new_image_name
class MaskFormer:
def __init__(self, device):
print("Initializing MaskFormer to %s" % device)
self.device = device
self.processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
self.model = CLIPSegForImageSegmentation.from_pretrained(
"CIDAS/clipseg-rd64-refined"
).to(device)
def inference(self, image_path, text):
threshold = 0.5
min_area = 0.02
padding = 20
original_image = Image.open(image_path)
image = original_image.resize((512, 512))
inputs = self.processor(
text=text, images=image, padding="max_length", return_tensors="pt"
).to(self.device)
with torch.no_grad():
outputs = self.model(**inputs)
mask = torch.sigmoid(outputs[0]).squeeze().cpu().numpy() > threshold
area_ratio = len(np.argwhere(mask)) / (mask.shape[0] * mask.shape[1])
if area_ratio < min_area:
return None
true_indices = np.argwhere(mask)
mask_array = np.zeros_like(mask, dtype=bool)
for idx in true_indices:
padded_slice = tuple(
slice(max(0, i - padding), i + padding + 1) for i in idx
)
mask_array[padded_slice] = True
visual_mask = (mask_array * 255).astype(np.uint8)
image_mask = Image.fromarray(visual_mask)
return image_mask.resize(original_image.size)
class ImageEditing:
def __init__(self, device):
print("Initializing ImageEditing to %s" % device)
self.device = device
self.mask_former = MaskFormer(device=self.device)
self.revision = "fp16" if "cuda" in device else None
self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
"runwayml/stable-diffusion-inpainting",
revision=self.revision,
torch_dtype=self.torch_dtype,
).to(device)
@tool(
name="Remove Something From The Photo",
description=(
"useful when you want to remove and object or something from the photo "
"from its description or location. "
"The input to this tool should be a comma separated string of two, "
"representing the image_path and the object need to be removed. "
),
)
def inference_remove(self, inputs):
image_path, to_be_removed_txt = inputs.split(",")
return self.inference_replace(f"{image_path},{to_be_removed_txt},background")
@tool(
name="Replace Something From The Photo",
description=(
"useful when you want to replace an object from the object description or"
" location with another object from its description. The input to this tool"
" should be a comma separated string of three, representing the image_path,"
" the object to be replaced, the object to be replaced with "
),
)
def inference_replace(self, inputs):
image_path, to_be_replaced_txt, replace_with_txt = inputs.split(",")
original_image = Image.open(image_path)
original_size = original_image.size
mask_image = self.mask_former.inference(image_path, to_be_replaced_txt)
updated_image = self.inpaint(
prompt=replace_with_txt,
image=original_image.resize((512, 512)),
mask_image=mask_image.resize((512, 512)),
).images[0]
updated_image_path = get_new_image_name(
image_path, func_name="replace-something"
)
updated_image = updated_image.resize(original_size)
updated_image.save(updated_image_path)
logger.debug(
f"\nProcessed ImageEditing, Input Image: {image_path}, Replace"
f" {to_be_replaced_txt} to {replace_with_txt}, Output Image:"
f" {updated_image_path}"
)
return updated_image_path
class InstructPix2Pix:
def __init__(self, device):
print("Initializing InstructPix2Pix to %s" % device)
self.device = device
self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
"timbrooks/instruct-pix2pix",
safety_checker=None,
torch_dtype=self.torch_dtype,
).to(device)
self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
self.pipe.scheduler.config
)
@tool(
name="Instruct Image Using Text",
description=(
"useful when you want to the style of the image to be like the text. "
"like: make it look like a painting. or make it like a robot. "
"The input to this tool should be a comma separated string of two, "
"representing the image_path and the text. "
),
)
def inference(self, inputs):
"""Change style of image."""
logger.debug("===> Starting InstructPix2Pix Inference")
image_path, text = inputs.split(",")[0], ",".join(inputs.split(",")[1:])
original_image = Image.open(image_path)
image = self.pipe(
text, image=original_image, num_inference_steps=40, image_guidance_scale=1.2
).images[0]
updated_image_path = get_new_image_name(image_path, func_name="pix2pix")
image.save(updated_image_path)
logger.debug(
f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text:"
f" {text}, Output Image: {updated_image_path}"
)
return updated_image_path
class Text2Image:
def __init__(self, device):
print("Initializing Text2Image to %s" % device)
self.device = device
self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
self.pipe = StableDiffusionPipeline.from_pretrained(
"runwayml/stable-diffusion-v1-5", torch_dtype=self.torch_dtype
)
self.pipe.to(device)
self.a_prompt = "best quality, extremely detailed"
self.n_prompt = (
"longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, "
"fewer digits, cropped, worst quality, low quality"
)
@tool(
name="Generate Image From User Input Text",
description=(
"useful when you want to generate an image from a user input text and save"
" it to a file. like: generate an image of an object or something, or"
" generate an image that includes some objects. The input to this tool"
" should be a string, representing the text used to generate image. "
),
)
def inference(self, text):
image_filename = os.path.join("image", str(uuid.uuid4())[0:8] + ".png")
prompt = text + ", " + self.a_prompt
image = self.pipe(prompt, negative_prompt=self.n_prompt).images[0]
image.save(image_filename)
logger.debug(
f"\nProcessed Text2Image, Input Text: {text}, Output Image:"
f" {image_filename}"
)
return image_filename
class VisualQuestionAnswering:
def __init__(self, device):
print("Initializing VisualQuestionAnswering to %s" % device)
self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
self.device = device
self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
self.model = BlipForQuestionAnswering.from_pretrained(
"Salesforce/blip-vqa-base", torch_dtype=self.torch_dtype
).to(self.device)
@tool(
name="Answer Question About The Image",
description=(
"useful when you need an answer for a question based on an image. like:"
" what is the background color of the last image, how many cats in this"
" figure, what is in this figure. The input to this tool should be a comma"
" separated string of two, representing the image_path and the question"
),
)
def inference(self, inputs):
image_path, question = inputs.split(",")
raw_image = Image.open(image_path).convert("RGB")
inputs = self.processor(raw_image, question, return_tensors="pt").to(
self.device, self.torch_dtype
)
out = self.model.generate(**inputs)
answer = self.processor.decode(out[0], skip_special_tokens=True)
logger.debug(
f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input"
f" Question: {question}, Output Answer: {answer}"
)
return answer
class ImageCaptioning(BaseHandler):
def __init__(self, device):
print("Initializing ImageCaptioning to %s" % device)
self.device = device
self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
self.processor = BlipProcessor.from_pretrained(
"Salesforce/blip-image-captioning-base"
)
self.model = BlipForConditionalGeneration.from_pretrained(
"Salesforce/blip-image-captioning-base", torch_dtype=self.torch_dtype
).to(self.device)
def handle(self, filename: str):
img = Image.open(filename)
width, height = img.size
ratio = min(512 / width, 512 / height)
width_new, height_new = (round(width * ratio), round(height * ratio))
img = img.resize((width_new, height_new))
img = img.convert("RGB")
img.save(filename, "PNG")
print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
inputs = self.processor(Image.open(filename), return_tensors="pt").to(
self.device, self.torch_dtype
)
out = self.model.generate(**inputs)
description = self.processor.decode(out[0], skip_special_tokens=True)
print(
f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text:"
f" {description}"
)
return IMAGE_PROMPT.format(filename=filename, description=description)
Loading…
Cancel
Save