fix: Refactor open ai api

Former-commit-id: a044ef733eef9097069b5697669aaebed435ce10
2 years ago · 374efe3411
parent 71133ebdaa
commit 374efe3411
8 changed files with 97 additions and 471 deletions
--- a/example.py
+++ b/example.py
@ -1,6 +1,8 @@
 from swarms.models import OpenAIChat
 from swarms.structs import Flow
 from langchain.schema.messages import ChatMessage
 message = ChatMessage(role="user", content='Translate the following English text to French: Hello World"')
 api_key = ""
 # Initialize the language model, this model can be swapped out with Anthropic, ETC, Huggingface Models like Mistral, ETC
@ -8,6 +10,7 @@ llm = OpenAIChat(
    # model_name="gpt-4"
    openai_api_key=api_key,
    temperature=0.5,
    message = message
    # max_tokens=100,
 )
@ -15,7 +18,7 @@ llm = OpenAIChat(
 ## Initialize the workflow
 flow = Flow(
    llm=llm,
-    max_loops=5,
+    max_loops=2,
    dashboard=True,
    # tools = [search_api, slack, ]
    # stopping_condition=None,  # You can define a stopping condition as needed.
@ -29,9 +32,7 @@ flow = Flow(
 # out = flow.load_state("flow_state.json")
 # temp = flow.dynamic_temperature()
 # filter = flow.add_response_filter("Trump")
-out = flow.run(
+out = flow.run(message)
    "Generate a 10,000 word blog on mental clarity and the benefits of meditation."
 )
 # out = flow.validate_response(out)
 # out = flow.analyze_feedback(out)
 # out = flow.print_history_and_memory()
--- a/playground/posmed/pycache/PosMedPrompts.cpython-310.pyc
+++ b/playground/posmed/pycache/PosMedPrompts.cpython-310.pyc
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -1,13 +1,14 @@
 import sys
-log_file = open("errors.txt", "w")
+# log_file = open("errors.txt", "w")
-sys.stderr = log_file
+# sys.stderr = log_file
 # LLMs
 from swarms.models.anthropic import Anthropic
 from swarms.models.petals import Petals
 from swarms.models.mistral import Mistral
-from swarms.models.openai_models import OpenAI, AzureOpenAI, OpenAIChat
+from swarms.models.openai_models import OpenAIChat, AzureOpenAI, OpenAI
 from swarms.models.zephyr import Zephyr
 from swarms.models.biogpt import BioGPT
 from swarms.models.huggingface import HuggingfaceLLM
--- a/swarms/models/openai_models.py
+++ b/swarms/models/openai_models.py
@ -1,6 +1,6 @@
 """OpenAI chat wrapper."""
 from __future__ import annotations
 """OpenAI chat wrapper."""
 import logging
 import os
 import sys
@ -72,7 +72,7 @@ def _import_tiktoken() -> Any:
 def _create_retry_decorator(
-    llm: ChatOpenAI,
+    llm: OpenAIChat,
    run_manager: Optional[
        Union[AsyncCallbackManagerForLLMRun, CallbackManagerForLLMRun]
    ] = None,
@ -92,7 +92,7 @@ def _create_retry_decorator(
 async def acompletion_with_retry(
-    llm: ChatOpenAI,
+    llm: OpenAIChat,
    run_manager: Optional[AsyncCallbackManagerForLLMRun] = None,
    **kwargs: Any,
 ) -> Any:
@ -139,8 +139,91 @@ def _convert_delta_to_message_chunk(
    else:
        return default_class(content=content)
 class OpenAI(BaseChatModel):
    """OpenAI large language models.
    To use, you should have the ``openai`` python package installed, and the
    environment variable ``OPENAI_API_KEY`` set with your API key.
    Any parameters that are valid to be passed to the openai.create call can be passed
    in, even if not explicitly saved on this class..,
    Example:
        .. code-block:: python
            from swarms.models import OpenAI
            openai = OpenAI(model_name="text-davinci-003")
            openai("What is the report on the 2022 oympian games?")
    """
    @property
    def _invocation_params(self) -> Dict[str, Any]:
        return {**{"model": self.model_name}, **super()._invocation_params}
 class AzureOpenAI(BaseChatModel):
    """Azure-specific OpenAI large language models.
    To use, you should have the ``openai`` python package installed, and the
    environment variable ``OPENAI_API_KEY`` set with your API key.
    Any parameters that are valid to be passed to the openai.create call can be passed
    in, even if not explicitly saved on this class.
    Example:
        .. code-block:: python
            from swarms.models import AzureOpenAI
            openai = AzureOpenAI(model_name="text-davinci-003")
    """
    deployment_name: str = ""
    """Deployment name to use."""
    openai_api_type: str = ""
    openai_api_version: str = ""
    @root_validator()
    def validate_azure_settings(cls, values: Dict) -> Dict:
        values["openai_api_version"] = get_from_dict_or_env(
            values,
            "openai_api_version",
            "OPENAI_API_VERSION",
        )
        values["openai_api_type"] = get_from_dict_or_env(
            values, "openai_api_type", "OPENAI_API_TYPE", "azure"
        )
        return values
    @property
    def _identifying_params(self) -> Mapping[str, Any]:
        return {
            **{"deployment_name": self.deployment_name},
            **super()._identifying_params,
        }
    @property
    def _invocation_params(self) -> Dict[str, Any]:
        openai_params = {
            "engine": self.deployment_name,
            "api_type": self.openai_api_type,
            "api_version": self.openai_api_version,
        }
        return {**openai_params, **super()._invocation_params}
    @property
    def _llm_type(self) -> str:
        """Return type of llm."""
        return "azure"
    @property
    def lc_attributes(self) -> Dict[str, Any]:
        return {
            "openai_api_type": self.openai_api_type,
            "openai_api_version": self.openai_api_version,
        }
-class ChatOpenAI(BaseChatModel):
+class OpenAIChat(BaseChatModel):
    """`OpenAI` Chat large language models API.
    To use, you should have the ``openai`` python package installed, and the
--- a/swarms/structs/flow.py
+++ b/swarms/structs/flow.py
@ -517,7 +517,7 @@ class Flow:
    def _run(self, **kwargs: Any) -> str:
        """Generate a result using the provided keyword args."""
        task = self.format_prompt(**kwargs)
-        response, history = self._generate(task, task)
+        response, history = self.llm._generate([ task, task ])
        logging.info(f"Message history: {history}")
        return response
--- a/swarms/structs/self._generate
+++ b/swarms/structs/self._generate
--- a/swarms/tools/autogpt.py
+++ b/swarms/tools/autogpt.py
@ -1,188 +0,0 @@
 import asyncio
 import os
 from contextlib import contextmanager
 from typing import Optional
 import pandas as pd
 import torch
 from langchain.agents import tool
 from langchain.agents.agent_toolkits.pandas.base import create_pandas_dataframe_agent
 from langchain.chains.qa_with_sources.loading import (
    BaseCombineDocumentsChain,
 )
 from langchain.docstore.document import Document
 from langchain.text_splitter import RecursiveCharacterTextSplitter
 from langchain.tools import BaseTool
 from PIL import Image
 from pydantic import Field
 from transformers import (
    BlipForQuestionAnswering,
    BlipProcessor,
 )
 from swarms.utils.logger import logger
 ROOT_DIR = "./data/"
@contextmanager
 def pushd(new_dir):
    """Context manager for changing the current working directory."""
    prev_dir = os.getcwd()
    os.chdir(new_dir)
    try:
        yield
    finally:
        os.chdir(prev_dir)
@tool
 def process_csv(
    llm, csv_file_path: str, instructions: str, output_path: Optional[str] = None
 ) -> str:
    """Process a CSV by with pandas in a limited REPL.\
 Only use this after writing data to disk as a csv file.\
 Any figures must be saved to disk to be viewed by the human.\
 Instructions should be written in natural language, not code. Assume the dataframe is already loaded."""
    with pushd(ROOT_DIR):
        try:
            df = pd.read_csv(csv_file_path)
        except Exception as e:
            return f"Error: {e}"
        agent = create_pandas_dataframe_agent(llm, df, max_iterations=30, verbose=False)
        if output_path is not None:
            instructions += f" Save output to disk at {output_path}"
        try:
            result = agent.run(instructions)
            return result
        except Exception as e:
            return f"Error: {e}"
 async def async_load_playwright(url: str) -> str:
    """Load the specified URLs using Playwright and parse using BeautifulSoup."""
    from bs4 import BeautifulSoup
    from playwright.async_api import async_playwright
    results = ""
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        try:
            page = await browser.new_page()
            await page.goto(url)
            page_source = await page.content()
            soup = BeautifulSoup(page_source, "html.parser")
            for script in soup(["script", "style"]):
                script.extract()
            text = soup.get_text()
            lines = (line.strip() for line in text.splitlines())
            chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
            results = "\n".join(chunk for chunk in chunks if chunk)
        except Exception as e:
            results = f"Error: {e}"
        await browser.close()
    return results
 def run_async(coro):
    event_loop = asyncio.get_event_loop()
    return event_loop.run_until_complete(coro)
@tool
 def browse_web_page(url: str) -> str:
    """Verbose way to scrape a whole webpage. Likely to cause issues parsing."""
    return run_async(async_load_playwright(url))
 def _get_text_splitter():
    return RecursiveCharacterTextSplitter(
        # Set a really small chunk size, just to show.
        chunk_size=500,
        chunk_overlap=20,
        length_function=len,
    )
 class WebpageQATool(BaseTool):
    name = "query_webpage"
    description = (
        "Browse a webpage and retrieve the information relevant to the question."
    )
    text_splitter: RecursiveCharacterTextSplitter = Field(
        default_factory=_get_text_splitter
    )
    qa_chain: BaseCombineDocumentsChain
    def _run(self, url: str, question: str) -> str:
        """Useful for browsing websites and scraping the text information."""
        result = browse_web_page.run(url)
        docs = [Document(page_content=result, metadata={"source": url})]
        web_docs = self.text_splitter.split_documents(docs)
        results = []
        # TODO: Handle this with a MapReduceChain
        for i in range(0, len(web_docs), 4):
            input_docs = web_docs[i : i + 4]
            window_result = self.qa_chain(
                {"input_documents": input_docs, "question": question},
                return_only_outputs=True,
            )
            results.append(f"Response from window {i} - {window_result}")
        results_docs = [
            Document(page_content="\n".join(results), metadata={"source": url})
        ]
        return self.qa_chain(
            {"input_documents": results_docs, "question": question},
            return_only_outputs=True,
        )
    async def _arun(self, url: str, question: str) -> str:
        raise NotImplementedError
 class EdgeGPTTool:
    # Initialize the custom tool
    def __init__(
        self,
        model,
        name="EdgeGPTTool",
        description="Tool that uses EdgeGPTModel to generate responses",
    ):
        super().__init__(name=name, description=description)
        self.model = model
    def _run(self, prompt):
        return self.model.__call__(prompt)
@tool
 def VQAinference(self, inputs):
    """
    Answer Question About The Image, VQA Multi-Modal Worker agent
    description="useful when you need an answer for a question based on an image. "
    "like: what is the background color of the last image, how many cats in this figure, what is in this figure. "
    "The input to this tool should be a comma separated string of two, representing the image_path and the question",
    """
    device = "cuda:0"
    torch_dtype = torch.float16 if "cuda" in device else torch.float32
    processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
    model = BlipForQuestionAnswering.from_pretrained(
        "Salesforce/blip-vqa-base", torch_dtype=torch_dtype
    ).to(device)
    image_path, question = inputs.split(",")
    raw_image = Image.open(image_path).convert("RGB")
    inputs = processor(raw_image, question, return_tensors="pt").to(device, torch_dtype)
    out = model.generate(**inputs)
    answer = processor.decode(out[0], skip_special_tokens=True)
    logger.debug(
        f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input"
        f" Question: {question}, Output Answer: {answer}"
    )
    return answer
--- a/swarms/tools/mm_models.py
+++ b/swarms/tools/mm_models.py
@ -1,271 +0,0 @@
 import os
 import uuid
 import numpy as np
 import torch
 from diffusers import (
    EulerAncestralDiscreteScheduler,
    StableDiffusionInpaintPipeline,
    StableDiffusionInstructPix2PixPipeline,
    StableDiffusionPipeline,
 )
 from PIL import Image
 from transformers import (
    BlipForConditionalGeneration,
    BlipForQuestionAnswering,
    BlipProcessor,
    CLIPSegForImageSegmentation,
    CLIPSegProcessor,
 )
 from swarms.prompts.prebuild.multi_modal_prompts import IMAGE_PROMPT
 from swarms.tools.tool import tool
 from swarms.utils.logger import logger
 from swarms.utils.main import BaseHandler, get_new_image_name
 class MaskFormer:
    def __init__(self, device):
        print("Initializing MaskFormer to %s" % device)
        self.device = device
        self.processor = CLIPSegProcessor.from_pretrained("CIDAS/clipseg-rd64-refined")
        self.model = CLIPSegForImageSegmentation.from_pretrained(
            "CIDAS/clipseg-rd64-refined"
        ).to(device)
    def inference(self, image_path, text):
        threshold = 0.5
        min_area = 0.02
        padding = 20
        original_image = Image.open(image_path)
        image = original_image.resize((512, 512))
        inputs = self.processor(
            text=text, images=image, padding="max_length", return_tensors="pt"
        ).to(self.device)
        with torch.no_grad():
            outputs = self.model(**inputs)
        mask = torch.sigmoid(outputs[0]).squeeze().cpu().numpy() > threshold
        area_ratio = len(np.argwhere(mask)) / (mask.shape[0] * mask.shape[1])
        if area_ratio < min_area:
            return None
        true_indices = np.argwhere(mask)
        mask_array = np.zeros_like(mask, dtype=bool)
        for idx in true_indices:
            padded_slice = tuple(
                slice(max(0, i - padding), i + padding + 1) for i in idx
            )
            mask_array[padded_slice] = True
        visual_mask = (mask_array * 255).astype(np.uint8)
        image_mask = Image.fromarray(visual_mask)
        return image_mask.resize(original_image.size)
 class ImageEditing:
    def __init__(self, device):
        print("Initializing ImageEditing to %s" % device)
        self.device = device
        self.mask_former = MaskFormer(device=self.device)
        self.revision = "fp16" if "cuda" in device else None
        self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
        self.inpaint = StableDiffusionInpaintPipeline.from_pretrained(
            "runwayml/stable-diffusion-inpainting",
            revision=self.revision,
            torch_dtype=self.torch_dtype,
        ).to(device)
    @tool(
        name="Remove Something From The Photo",
        description=(
            "useful when you want to remove and object or something from the photo "
            "from its description or location. "
            "The input to this tool should be a comma separated string of two, "
            "representing the image_path and the object need to be removed. "
        ),
    )
    def inference_remove(self, inputs):
        image_path, to_be_removed_txt = inputs.split(",")
        return self.inference_replace(f"{image_path},{to_be_removed_txt},background")
    @tool(
        name="Replace Something From The Photo",
        description=(
            "useful when you want to replace an object from the object description or"
            " location with another object from its description. The input to this tool"
            " should be a comma separated string of three, representing the image_path,"
            " the object to be replaced, the object to be replaced with "
        ),
    )
    def inference_replace(self, inputs):
        image_path, to_be_replaced_txt, replace_with_txt = inputs.split(",")
        original_image = Image.open(image_path)
        original_size = original_image.size
        mask_image = self.mask_former.inference(image_path, to_be_replaced_txt)
        updated_image = self.inpaint(
            prompt=replace_with_txt,
            image=original_image.resize((512, 512)),
            mask_image=mask_image.resize((512, 512)),
        ).images[0]
        updated_image_path = get_new_image_name(
            image_path, func_name="replace-something"
        )
        updated_image = updated_image.resize(original_size)
        updated_image.save(updated_image_path)
        logger.debug(
            f"\nProcessed ImageEditing, Input Image: {image_path}, Replace"
            f" {to_be_replaced_txt} to {replace_with_txt}, Output Image:"
            f" {updated_image_path}"
        )
        return updated_image_path
 class InstructPix2Pix:
    def __init__(self, device):
        print("Initializing InstructPix2Pix to %s" % device)
        self.device = device
        self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
        self.pipe = StableDiffusionInstructPix2PixPipeline.from_pretrained(
            "timbrooks/instruct-pix2pix",
            safety_checker=None,
            torch_dtype=self.torch_dtype,
        ).to(device)
        self.pipe.scheduler = EulerAncestralDiscreteScheduler.from_config(
            self.pipe.scheduler.config
        )
    @tool(
        name="Instruct Image Using Text",
        description=(
            "useful when you want to the style of the image to be like the text. "
            "like: make it look like a painting. or make it like a robot. "
            "The input to this tool should be a comma separated string of two, "
            "representing the image_path and the text. "
        ),
    )
    def inference(self, inputs):
        """Change style of image."""
        logger.debug("===> Starting InstructPix2Pix Inference")
        image_path, text = inputs.split(",")[0], ",".join(inputs.split(",")[1:])
        original_image = Image.open(image_path)
        image = self.pipe(
            text, image=original_image, num_inference_steps=40, image_guidance_scale=1.2
        ).images[0]
        updated_image_path = get_new_image_name(image_path, func_name="pix2pix")
        image.save(updated_image_path)
        logger.debug(
            f"\nProcessed InstructPix2Pix, Input Image: {image_path}, Instruct Text:"
            f" {text}, Output Image: {updated_image_path}"
        )
        return updated_image_path
 class Text2Image:
    def __init__(self, device):
        print("Initializing Text2Image to %s" % device)
        self.device = device
        self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
        self.pipe = StableDiffusionPipeline.from_pretrained(
            "runwayml/stable-diffusion-v1-5", torch_dtype=self.torch_dtype
        )
        self.pipe.to(device)
        self.a_prompt = "best quality, extremely detailed"
        self.n_prompt = (
            "longbody, lowres, bad anatomy, bad hands, missing fingers, extra digit, "
            "fewer digits, cropped, worst quality, low quality"
        )
    @tool(
        name="Generate Image From User Input Text",
        description=(
            "useful when you want to generate an image from a user input text and save"
            " it to a file. like: generate an image of an object or something, or"
            " generate an image that includes some objects. The input to this tool"
            " should be a string, representing the text used to generate image. "
        ),
    )
    def inference(self, text):
        image_filename = os.path.join("image", str(uuid.uuid4())[0:8] + ".png")
        prompt = text + ", " + self.a_prompt
        image = self.pipe(prompt, negative_prompt=self.n_prompt).images[0]
        image.save(image_filename)
        logger.debug(
            f"\nProcessed Text2Image, Input Text: {text}, Output Image:"
            f" {image_filename}"
        )
        return image_filename
 class VisualQuestionAnswering:
    def __init__(self, device):
        print("Initializing VisualQuestionAnswering to %s" % device)
        self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
        self.device = device
        self.processor = BlipProcessor.from_pretrained("Salesforce/blip-vqa-base")
        self.model = BlipForQuestionAnswering.from_pretrained(
            "Salesforce/blip-vqa-base", torch_dtype=self.torch_dtype
        ).to(self.device)
    @tool(
        name="Answer Question About The Image",
        description=(
            "useful when you need an answer for a question based on an image. like:"
            " what is the background color of the last image, how many cats in this"
            " figure, what is in this figure. The input to this tool should be a comma"
            " separated string of two, representing the image_path and the question"
        ),
    )
    def inference(self, inputs):
        image_path, question = inputs.split(",")
        raw_image = Image.open(image_path).convert("RGB")
        inputs = self.processor(raw_image, question, return_tensors="pt").to(
            self.device, self.torch_dtype
        )
        out = self.model.generate(**inputs)
        answer = self.processor.decode(out[0], skip_special_tokens=True)
        logger.debug(
            f"\nProcessed VisualQuestionAnswering, Input Image: {image_path}, Input"
            f" Question: {question}, Output Answer: {answer}"
        )
        return answer
 class ImageCaptioning(BaseHandler):
    def __init__(self, device):
        print("Initializing ImageCaptioning to %s" % device)
        self.device = device
        self.torch_dtype = torch.float16 if "cuda" in device else torch.float32
        self.processor = BlipProcessor.from_pretrained(
            "Salesforce/blip-image-captioning-base"
        )
        self.model = BlipForConditionalGeneration.from_pretrained(
            "Salesforce/blip-image-captioning-base", torch_dtype=self.torch_dtype
        ).to(self.device)
    def handle(self, filename: str):
        img = Image.open(filename)
        width, height = img.size
        ratio = min(512 / width, 512 / height)
        width_new, height_new = (round(width * ratio), round(height * ratio))
        img = img.resize((width_new, height_new))
        img = img.convert("RGB")
        img.save(filename, "PNG")
        print(f"Resize image form {width}x{height} to {width_new}x{height_new}")
        inputs = self.processor(Image.open(filename), return_tensors="pt").to(
            self.device, self.torch_dtype
        )
        out = self.model.generate(**inputs)
        description = self.processor.decode(out[0], skip_special_tokens=True)
        print(
            f"\nProcessed ImageCaptioning, Input Image: {filename}, Output Text:"
            f" {description}"
        )
        return IMAGE_PROMPT.format(filename=filename, description=description)