feat: Setup vllm class

2 years ago · c63422f78b
parent 7731f7db7d
commit c63422f78b
12 changed files with 142 additions and 52 deletions
--- a/.gitignore
+++ b/.gitignore
@ -75,6 +75,7 @@ coverage.xml
 .hypothesis/
 .pytest_cache/
 cover/
 cookies.json
 # Translations
 *.mo
--- a/bingchat.py
+++ b/bingchat.py
@ -0,0 +1,7 @@
 from swarms.models.bing_chat import BingChat
 # Initialize the EdgeGPTModel
 bing = BingChat(cookies_path="./cookies.json")
 task = "generate topics for PositiveMed.com,:  1. Monitor Health Trends: Scan Google Alerts, authoritative health websites, and social media for emerging health, wellness, and medical discussions. 2. Keyword Research: Utilize tools like SEMrush to identify keywords with moderate to high search volume and low competition. Focus on long-tail, conversational keywords. 3. Analyze Site Data: Review PositiveMed's analytics to pinpoint popular articles and areas lacking recent content. 4. Crowdsourcing: Gather topic suggestions from the brand's audience and internal team, ensuring alignment with PositiveMed's mission. 5. Topic Evaluation: Assess topics for audience relevance, uniqueness, brand fit, current relevance, and SEO potential. 6. Tone and Style: Ensure topics can be approached with an educational, empowering, and ethical tone, in line with the brand's voice.  Use this framework to generate a list of potential topics that cater to PositiveMed's audience while staying true to its brand ethos.  Find trending topics for slowing and reversing aging think step by step and o into as much detail as possible"
 response = bing(task)
 print(response)
--- a/example.py
+++ b/example.py
@ -1,22 +1,29 @@
 from tabnanny import verbose
 from click import prompt
 from langchain import LLMChain
 from swarms.models import OpenAIChat
 from swarms import Worker
 from swarms.prompts import PRODUCT_AGENT_PROMPT
 from swarms.models.bing_chat import BingChat
-api_key = ""
+# api_key = ""
-llm = OpenAIChat(
+# llm = OpenAIChat(
-    openai_api_key=api_key,
+#     openai_api_key=api_key,
-    temperature=0.5,
+#     temperature=0.5,
-)
+# )
 llm = BingChat(cookies_path="./cookies.json")
 # llm = LLMChain(llm=bing.to_dict(), prompt=prompt, verbose=verbose)
 node = Worker(
    llm=llm,
    ai_name="Optimus Prime",
    openai_api_key=api_key,
    ai_role=PRODUCT_AGENT_PROMPT,
    external_tools=None,
    human_in_the_loop=False,
    temperature=0.5,
    use_openai=False
 )
 task = "Create an entirely new board game around riddles for physics"
--- a/playground/agents/bingchat.py
+++ b/playground/agents/bingchat.py
@ -4,16 +4,12 @@ from swarms.tools.autogpt import EdgeGPTTool, tool
 from swarms.models import OpenAIChat
 import os
-api_key = os.getenv("OPENAI_API_KEY")
+load_dotenv("../.env")
 auth_cookie = os.environ.get("AUTH_COOKIE")
 auth_cookie_SRCHHPGUSR = os.environ.get("AUTH_COOKIE_SRCHHPGUSR")
 # Initialize the EdgeGPTModel
-edgegpt = BingChat()
+bing = BingChat(cookies_path="./cookies.json", auth_cookie_SRCHHPGUSR)
 task = "generate topics for PositiveMed.com,:  1. Monitor Health Trends: Scan Google Alerts, authoritative health websites, and social media for emerging health, wellness, and medical discussions. 2. Keyword Research: Utilize tools like SEMrush to identify keywords with moderate to high search volume and low competition. Focus on long-tail, conversational keywords. 3. Analyze Site Data: Review PositiveMed's analytics to pinpoint popular articles and areas lacking recent content. 4. Crowdsourcing: Gather topic suggestions from the brand's audience and internal team, ensuring alignment with PositiveMed's mission. 5. Topic Evaluation: Assess topics for audience relevance, uniqueness, brand fit, current relevance, and SEO potential. 6. Tone and Style: Ensure topics can be approached with an educational, empowering, and ethical tone, in line with the brand's voice.  Use this framework to generate a list of potential topics that cater to PositiveMed's audience while staying true to its brand ethos.  Find trending topics for slowing and reversing aging think step by step and o into as much detail as possible"
-
+bing(task)
 # Initialize the Worker with the custom tool
 worker = Worker(llm=llm, ai_name="EdgeGPT Worker", external_tools=[edgegpt])
 # Use the worker to process a task
 task = "Hello, my name is ChatGPT"
 response = worker.run(task)
 print(response)
--- a/revgpt.py
+++ b/revgpt.py
@ -0,0 +1,29 @@
 import os
 import sys
 from dotenv import load_dotenv
 from swarms.models.revgptV4 import RevChatGPTModelv4
 from swarms.models.revgptV1 import RevChatGPTModelv1
 root_dir = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
 sys.path.append(root_dir)
 load_dotenv()
 config = {
    "model": os.getenv("REVGPT_MODEL"),
    "plugin_ids": [os.getenv("REVGPT_PLUGIN_IDS")],
    "disable_history": os.getenv("REVGPT_DISABLE_HISTORY") == "True",
    "PUID": os.getenv("REVGPT_PUID"),
    "unverified_plugin_domains": [os.getenv("REVGPT_UNVERIFIED_PLUGIN_DOMAINS")],
 }
 # For v1 model
 model = RevChatGPTModelv1(access_token=os.getenv("ACCESS_TOKEN"), **config)
 # model = RevChatGPTModelv4(access_token=os.getenv("ACCESS_TOKEN"), **config)
 # For v3 model
 # model = RevChatGPTModel(access_token=os.getenv("OPENAI_API_KEY"), **config)
 task = "Write a cli snake game"
 response = model.run(task)
 print(response)
--- a/swarms/models/init.py
+++ b/swarms/models/init.py
@ -18,8 +18,8 @@ from swarms.models.layoutlm_document_qa import LayoutLMDocumentQA
 # from swarms.models.fuyu import Fuyu # Not working, wait until they update
 import sys
-log_file = open("stderr_log.txt", "w")
+# log_file = open("stderr_log.txt", "w")
-sys.stderr = log_file
+# sys.stderr = log_file
 __all__ = [
--- a/swarms/models/bing_chat.py
+++ b/swarms/models/bing_chat.py
@ -1,4 +1,5 @@
 """Bing-Chat model by Micorsoft"""
 import os
 import asyncio
 import json
 from pathlib import Path
@ -25,19 +26,9 @@ class BingChat:
    """
-
+    def __init__(self, cookies_path: str = None):
-    def __init__(self, cookies_path: str = None, auth_cookie: str = None, auth_cookie_SRCHHPGUSR: str = None):
+        self.cookies = json.loads(open(cookies_path, encoding="utf-8").read())
        auth_cookie = os.environ("AUTH_COOKIE")
        auth_cookie_SRCHHPGUSR = os.enviro("AUTH_COOKIE_SRCHHPGUSR")
        if cookies_path:
            self.cookies = json.loads(open(cookies_path, encoding="utf-8").read())
        elif auth_cookie:
            self.cookies = auth_cookie
        else:
            raise ValueError("Either cookies_path or auth_cookie must be provided.")
        self.bot = asyncio.run(Chatbot.create(cookies=self.cookies))
        self.auth_cookie = auth_cookie
        self.auth_cookie_SRCHHPGUSR = auth_cookie_SRCHHPGUSR
    def __call__(
        self, prompt: str, style: ConversationStyle = ConversationStyle.creative
--- a/swarms/models/bioclip.py
+++ b/swarms/models/bioclip.py
@ -75,6 +75,7 @@ class BioClip:
            'adenocarcinoma histopathology',
            'brain MRI',
            'covid line chart',
            'covid line chart',
            'squamous cell carcinoma histopathology',
            'immunohistochemistry histopathology',
            'bone X-ray',
--- a/swarms/models/revgptV1.py
+++ b/swarms/models/revgptV1.py
@ -35,13 +35,13 @@ from httpx import AsyncClient
 from OpenAIAuth import Auth0 as Authenticator
 from rich.live import Live
 from rich.markdown import Markdown
-import schemas.typings as t
+import swarms.schemas.typings as t
 from swarms.utils.revutils import create_completer
 from swarms.utils.revutils import create_session
 from swarms.utils.revutils import get_input
 # BASE_URL = environ.get("CHATGPT_BASE_URL", "http://192.168.250.249:9898/api/")
-# BASE_URL = os.environ.get("CHATGPT_BASE_URL", "https://ai.fakeopen.com/api/")
+BASE_URL = os.environ.get("CHATGPT_BASE_URL", "https://ai.fakeopen.com/api/")
 # BASE_URL = environ.get("CHATGPT_BASE_URL", "https://bypass.churchless.tech/")
 bcolors = t.Colors()
--- a/swarms/models/revgptV4.py
+++ b/swarms/models/revgptV4.py
@ -40,14 +40,14 @@ from rich.markdown import Markdown
 import argparse
 import re
-import schemas.typings as t
+import swarms.schemas.typings as t
 from prompt_toolkit import prompt
 from prompt_toolkit import PromptSession
 from prompt_toolkit.auto_suggest import AutoSuggestFromHistory
 from prompt_toolkit.completion import WordCompleter
 from prompt_toolkit.history import InMemoryHistory
 from prompt_toolkit.key_binding import KeyBindings
-from schemas.typings import Colors
+from swarms.schemas.typings import Colors
 bindings = KeyBindings()
--- a/swarms/models/vllm.py
+++ b/swarms/models/vllm.py
@ -0,0 +1,55 @@
 from vllm import LLM, SamplingParams
 import openai
 import ray
 import uvicorn
 from vllm.entrypoints import api_server as vllm_api_server
 from vllm.entrypoints.openai import api_server as openai_api_server
 from skypilot import SkyPilot
 class VLLMModel:
    def __init__(self, model_name="facebook/opt-125m", tensor_parallel_size=1):
        self.model_name = model_name
        self.tensor_parallel_size = tensor_parallel_size
        self.model = LLM(model_name, tensor_parallel_size=tensor_parallel_size)
        self.temperature = 1.0
        self.max_tokens = None
        self.sampling_params = SamplingParams(temperature=self.temperature)
    def generate_text(self, prompt: str) -> str:
        output = self.model.generate([prompt], self.sampling_params)
        return output[0].outputs[0].text
    def set_temperature(self, value: float):
        self.temperature = value
        self.sampling_params = SamplingParams(temperature=self.temperature)
    def set_max_tokens(self, value: int):
        self.max_tokens = value
        self.sampling_params = SamplingParams(temperature=self.temperature, max_tokens=self.max_tokens)
    def offline_batched_inference(self, prompts: list) -> list:
        outputs = self.model.generate(prompts, self.sampling_params)
        return [output.outputs[0].text for output in outputs]
    def start_api_server(self):
        uvicorn.run(vllm_api_server.app, host="0.0.0.0", port=8000)
    def start_openai_compatible_server(self):
        uvicorn.run(openai_api_server.app, host="0.0.0.0", port=8000)
    def query_openai_compatible_server(self, prompt: str):
        openai.api_key = "EMPTY"
        openai.api_base = "http://localhost:8000/v1"
        completion = openai.Completion.create(model=self.model_name, prompt=prompt)
        return completion
    def distributed_inference(self, prompt: str):
        ray.init()
        self.model = LLM(self.model_name, tensor_parallel_size=self.tensor_parallel_size)
        output = self.model.generate(prompt, self.sampling_params)
        ray.shutdown()
        return output[0].outputs[0].text
    def run_on_cloud_with_skypilot(self, yaml_file):
        sky = SkyPilot()
        sky.launch(yaml_file)
--- a/swarms/workers/worker.py
+++ b/swarms/workers/worker.py
@ -68,11 +68,13 @@ class Worker:
        temperature: float = 0.5,
        llm=None,
        openai_api_key: str = None,
        use_openai: bool = True,
    ):
        self.temperature = temperature
        self.human_in_the_loop = human_in_the_loop
        self.llm = llm
        self.openai_api_key = openai_api_key
        self.use_openai = use_openai
        self.ai_name = ai_name
        self.ai_role = ai_role
        self.coordinates = (
@ -149,24 +151,25 @@ class Worker:
            self.tools.extend(external_tools)
    def setup_memory(self):
-        """
+            """
-        Set up memory for the worker.
+            Set up memory for the worker.
-        """
+            """
-        openai_api_key = os.getenv("OPENAI_API_KEY") or self.openai_api_key
+            if self.use_openai:  # Only use OpenAI if use_openai is True
-        try:
+                openai_api_key = os.getenv("OPENAI_API_KEY") or self.openai_api_key
-            embeddings_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
+                try:
-            embedding_size = 1536
+                    embeddings_model = OpenAIEmbeddings(openai_api_key=openai_api_key)
-            index = faiss.IndexFlatL2(embedding_size)
+                    embedding_size = 1536
-
+                    index = faiss.IndexFlatL2(embedding_size)
-            self.vectorstore = FAISS(
+
-                embeddings_model.embed_query, index, InMemoryDocstore({}), {}
+                    self.vectorstore = FAISS(
-            )
+                        embeddings_model.embed_query, index, InMemoryDocstore({}), {}
-
+                    )
-        except Exception as error:
+
-            raise RuntimeError(
+                except Exception as error:
-                f"Error setting up memory perhaps try try tuning the embedding size: {error}"
+                    raise RuntimeError(
-            )
+                        f"Error setting up memory perhaps try try tuning the embedding size: {error}"
-
+                    )
    def setup_agent(self):
        """
        Set up the autonomous agent.