[DOCS][LUMO]

5 months ago · f1600d2e43
parent 12109e3857
commit f1600d2e43
16 changed files with 252 additions and 94 deletions
--- a/docs/mkdocs.yml
+++ b/docs/mkdocs.yml
@ -210,10 +210,12 @@ nav:
      - OpenRouter: "swarms/examples/openrouter.md"
      - XAI: "swarms/examples/xai.md"
    - Swarms Tools: 
-      - Agent with HTX + CoinGecko: "swarms/examples/swarms_tools_htx.md"
-      - Agent with HTX + CoinGecko Function Calling: "swarms/examples/swarms_tools_htx_gecko.md"
      - Agent with Yahoo Finance: "swarms/examples/yahoo_finance.md"
      - Twitter Agents: "swarms_tools/twitter.md"
+      - Blockchain Agents:
+        - Agent with HTX + CoinGecko: "swarms/examples/swarms_tools_htx.md"
+        - Agent with HTX + CoinGecko Function Calling: "swarms/examples/swarms_tools_htx_gecko.md"
+        - Lumo: "swarms/examples/lumo.md"
    - Meme Agents:
      - Bob The Builder: "swarms/examples/bob_the_builder.md"
      - Meme Agent Builder: "swarms/examples/meme_agents.md"
--- a/docs/swarms/examples/lumo.md
+++ b/docs/swarms/examples/lumo.md
@ -0,0 +1,63 @@
+# Lumo Example
+Introducing Lumo-70B-Instruct - the largest and most advanced AI model ever created for the Solana ecosystem. Built on Meta's groundbreaking LLaMa 3.3 70B Instruct foundation, this revolutionary model represents a quantum leap in blockchain-specific artificial intelligence. With an unprecedented 70 billion parameters and trained on the most comprehensive Solana documentation dataset ever assembled, Lumo-70B-Instruct sets a new standard for developer assistance in the blockchain space.
+
+
+- [Docs](https://huggingface.co/lumolabs-ai/Lumo-70B-Instruct)
+
+```python
+from swarms import Agent
+from transformers import LlamaForCausalLM, AutoTokenizer
+import torch
+from transformers import BitsAndBytesConfig
+
+class Lumo:
+    """
+    A class for generating text using the Lumo model with 4-bit quantization.
+    """
+    def __init__(self):
+        """
+        Initializes the Lumo model with 4-bit quantization and a tokenizer.
+        """
+        # Configure 4-bit quantization
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
+            llm_int8_enable_fp32_cpu_offload=True
+        )
+
+        self.model = LlamaForCausalLM.from_pretrained(
+            "lumolabs-ai/Lumo-70B-Instruct",
+            device_map="auto",
+            quantization_config=bnb_config,
+            use_cache=False,
+            attn_implementation="sdpa"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("lumolabs-ai/Lumo-70B-Instruct")
+
+    def run(self, task: str) -> str:
+        """
+        Generates text based on the given prompt using the Lumo model.
+
+        Args:
+            prompt (str): The input prompt for the model.
+
+        Returns:
+            str: The generated text.
+        """
+        inputs = self.tokenizer(task, return_tensors="pt").to(self.model.device)
+        outputs = self.model.generate(**inputs, max_new_tokens=100)
+        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+
+
+
+Agent(
+    agent_name="Solana-Analysis-Agent",
+    model_name=Lumo(),
+    max_loops="auto",
+    interactive=True,
+    streaming_on=True,
+).run("How do i create a smart contract in solana?")
+
+```
--- a/example.py
+++ b/example.py
@ -1,6 +1,3 @@
-import os
-
-from swarm_models import OpenAIChat
 from swarms import Agent
 from swarms.prompts.finance_agent_sys_prompt import (
    FINANCIAL_AGENT_SYS_PROMPT,
@ -9,16 +6,6 @@ from dotenv import load_dotenv

 load_dotenv()

-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("GROQ_API_KEY")
-
-# Model
-model = OpenAIChat(
-    openai_api_base="https://api.groq.com/openai/v1",
-    openai_api_key=api_key,
-    model_name="llama-3.1-70b-versatile",
-    temperature=0.1,
-)

 # Initialize the agent
 agent = Agent(
@ -26,7 +13,7 @@ agent = Agent(
    agent_description="Personal finance advisor agent",
    system_prompt=FINANCIAL_AGENT_SYS_PROMPT,
    max_loops=1,
-    llm=model,
+    model_name="gpt-4o",
    dynamic_temperature_enabled=True,
    user_name="swarms_corp",
    retry_attempts=3,
--- a/new_features_examples/lumo_example.py
+++ b/new_features_examples/lumo_example.py
@ -0,0 +1,59 @@
+import torch
+from transformers import (
+    AutoTokenizer,
+    BitsAndBytesConfig,
+    LlamaForCausalLM,
+)
+
+from swarms import Agent
+
+
+class Lumo:
+    """
+    A class for generating text using the Lumo model with 4-bit quantization.
+    """
+    def __init__(self):
+        """
+        Initializes the Lumo model with 4-bit quantization and a tokenizer.
+        """
+        # Configure 4-bit quantization
+        bnb_config = BitsAndBytesConfig(
+            load_in_4bit=True,
+            bnb_4bit_quant_type="nf4",
+            bnb_4bit_compute_dtype=torch.float16,
+            llm_int8_enable_fp32_cpu_offload=True
+        )
+
+        self.model = LlamaForCausalLM.from_pretrained(
+            "lumolabs-ai/Lumo-70B-Instruct",
+            device_map="auto",
+            quantization_config=bnb_config,
+            use_cache=False,
+            attn_implementation="sdpa"
+        )
+        self.tokenizer = AutoTokenizer.from_pretrained("lumolabs-ai/Lumo-70B-Instruct")
+
+    def run(self, task: str) -> str:
+        """
+        Generates text based on the given prompt using the Lumo model.
+
+        Args:
+            prompt (str): The input prompt for the model.
+
+        Returns:
+            str: The generated text.
+        """
+        inputs = self.tokenizer(task, return_tensors="pt").to(self.model.device)
+        outputs = self.model.generate(**inputs, max_new_tokens=100)
+        return self.tokenizer.decode(outputs[0], skip_special_tokens=True)
+
+
+
+
+Agent(
+    agent_name="Solana-Analysis-Agent",
+    model_name=Lumo(),
+    max_loops="auto",
+    interactive=True,
+    streaming_on=True,
+).run("How do i create a smart contract in solana?")
--- a/pyproject.toml
+++ b/pyproject.toml
@ -5,7 +5,7 @@ build-backend = "poetry.core.masonry.api"

 [tool.poetry]
 name = "swarms"
-version = "6.9.7"
+version = "6.9.8"
 description = "Swarms - TGSC"
 license = "MIT"
 authors = ["Kye Gomez <kye@apac.ai>"]
@ -62,7 +62,6 @@ python = ">=3.10,<4.0"
 asyncio = ">=3.4.3,<4.0"
 toml = "*"
 pypdf = "5.1.0"
-swarm-models = "*"
 loguru = "*"
 pydantic = "*"
 tenacity = "*"
@ -76,7 +75,6 @@ aiofiles = "*"
 clusterops = "*"
 # chromadb = "*"
 rich = "*"
-pandas = "*"
 # sentence-transformers = "*"


--- a/swarms/structs/agent.py
+++ b/swarms/structs/agent.py
@ -23,7 +23,6 @@ import toml
 import yaml
 from loguru import logger
 from pydantic import BaseModel
-from swarm_models.tiktoken_wrapper import TikTokenizer

 from swarms.agents.ape_agent import auto_generate_prompt
 from swarms.artifacts.main_artifact import Artifact
@ -55,6 +54,7 @@ from swarms.utils.wrapper_clusterop import (
 )
 from swarms.telemetry.capture_sys_data import log_agent_data
 from swarms.agents.agent_print import agent_print
+from swarms.utils.litellm_tokenizer import count_tokens


 # Utils
@ -439,7 +439,7 @@ class Agent:
        self.time_created = time_created
        self.data_memory = data_memory
        self.load_yaml_path = load_yaml_path
-        self.tokenizer = TikTokenizer()
+        self.tokenizer = tokenizer
        self.auto_generate_prompt = auto_generate_prompt
        self.rag_every_loop = rag_every_loop
        self.plan_enabled = plan_enabled
@ -563,9 +563,7 @@ class Agent:
            max_loops=self.max_loops,
            steps=self.short_memory.to_dict(),
            full_history=self.short_memory.get_str(),
-            total_tokens=self.tokenizer.count_tokens(
-                self.short_memory.get_str()
-            ),
+            total_tokens=count_tokens(self.short_memory.get_str()),
            stopping_token=self.stopping_token,
            interactive=self.interactive,
            dynamic_temperature_enabled=self.dynamic_temperature_enabled,
@ -1043,10 +1041,8 @@ class Agent:
            self.agent_output.full_history = (
                self.short_memory.get_str()
            )
-            self.agent_output.total_tokens = (
-                self.tokenizer.count_tokens(
-                    self.short_memory.get_str()
-                )
+            self.agent_output.total_tokens = count_tokens(
+                self.short_memory.get_str()
            )

            # Handle artifacts
@ -1976,7 +1972,7 @@ class Agent:
                )

                # # Count the tokens
-                # memory_token_count = self.tokenizer.count_tokens(
+                # memory_token_count = count_tokens(
                #     memory_retrieval
                # )
                # if memory_token_count > self.memory_chunk_size:
@ -2065,7 +2061,7 @@ class Agent:
    def check_available_tokens(self):
        # Log the amount of tokens left in the memory and in the task
        if self.tokenizer is not None:
-            tokens_used = self.tokenizer.count_tokens(
+            tokens_used = count_tokens(
                self.short_memory.return_history_as_string()
            )
            logger.info(
@ -2076,7 +2072,7 @@ class Agent:

    def tokens_checks(self):
        # Check the tokens available
-        tokens_used = self.tokenizer.count_tokens(
+        tokens_used = count_tokens(
            self.short_memory.return_history_as_string()
        )
        out = self.check_available_tokens()
@ -2140,13 +2136,10 @@ class Agent:

        # Calculate token usage
        # full_memory = self.short_memory.return_history_as_string()
-        # prompt_tokens = self.tokenizer.count_tokens(full_memory)
-        # completion_tokens = self.tokenizer.count_tokens(response)
+        # prompt_tokens = count_tokens(full_memory)
+        # completion_tokens = count_tokens(response)
        # total_tokens = prompt_tokens + completion_tokens
-        total_tokens = (
-            self.tokenizer.count_tokens(task)
-            + self.tokenizer.count_tokens(response),
-        )
+        total_tokens = (count_tokens(task) + count_tokens(response),)

        # # Get memory responses
        # memory_responses = {
--- a/swarms/structs/agent_memory_manager.py
+++ b/swarms/structs/agent_memory_manager.py
@ -7,7 +7,7 @@ from typing import Any, Dict, List, Optional

 import yaml
 from pydantic import BaseModel
-from swarm_models.tiktoken_wrapper import TikTokenizer
+from swarms.utils.litellm_tokenizer import count_tokens

 logger = logging.getLogger(__name__)

@ -60,7 +60,7 @@ class MemoryManager:
        long_term_memory: Optional[Any] = None,
    ):
        self.config = config
-        self.tokenizer = tokenizer or TikTokenizer()
+        self.tokenizer = tokenizer
        self.long_term_memory = long_term_memory

        # Initialize memories
@ -86,7 +86,7 @@ class MemoryManager:
            agent_name=agent_name,
            session_id=session_id,
            memory_type=memory_type,
-            token_count=self.tokenizer.count_tokens(content),
+            token_count=count_tokens(content),
        )
        return MemoryEntry(content=content, metadata=metadata)

@ -219,7 +219,7 @@ class MemoryManager:
        self, text: str, max_tokens: int
    ) -> str:
        """Truncate text to fit within token limit"""
-        current_tokens = self.tokenizer.count_tokens(text)
+        current_tokens = count_tokens(text)

        if current_tokens <= max_tokens:
            return text
@ -230,7 +230,7 @@ class MemoryManager:
        current_count = 0

        for sentence in sentences:
-            sentence_tokens = self.tokenizer.count_tokens(sentence)
+            sentence_tokens = count_tokens(sentence)
            if current_count + sentence_tokens <= max_tokens:
                result.append(sentence)
                current_count += sentence_tokens
@ -376,9 +376,7 @@ class MemoryManager:
                        agent_name="system",
                        session_id="long_term",
                        memory_type="long_term",
-                        token_count=self.tokenizer.count_tokens(
-                            content
-                        ),
+                        token_count=count_tokens(content),
                    )
                    results.append(
                        MemoryEntry(
--- a/swarms/structs/auto_swarm_builder.py
+++ b/swarms/structs/auto_swarm_builder.py
@ -2,9 +2,11 @@ import os
 from typing import List

 from pydantic import BaseModel, Field
-from swarm_models import OpenAIFunctionCaller, OpenAIChat

 from swarms.structs.agent import Agent
+from swarms.structs.meme_agent_persona_generator import (
+    OpenAIFunctionCaller,
+)
 from swarms.structs.swarm_router import SwarmRouter
 from swarms.utils.loguru_logger import initialize_logger
 from swarms.structs.agents_available import showcase_available_agents
@ -64,15 +66,6 @@ class SwarmConfig(BaseModel):
    )


-# Get the OpenAI API key from the environment variable
-api_key = os.getenv("OPENAI_API_KEY")
-
-# Create an instance of the OpenAIChat class
-model = OpenAIChat(
-    openai_api_key=api_key, model_name="gpt-4o-mini", temperature=0.1
-)
-
-
 BOSS_SYSTEM_PROMPT = """
 Manage a swarm of worker agents to efficiently serve the user by deciding whether to create new agents or delegate tasks. Ensure operations are efficient and effective.

@ -248,7 +241,7 @@ class AutoSwarmBuilder:
            agent_name=agent_name,
            description=agent_description,
            system_prompt=agent_system_prompt,
-            llm=model,
+            model_name="gpt-4o",
            max_loops=max_loops,
            autosave=True,
            dashboard=False,
--- a/swarms/structs/base_swarm.py
+++ b/swarms/structs/base_swarm.py
@ -20,11 +20,6 @@ from swarms.structs.agent import Agent
 from swarms.structs.conversation import Conversation
 from swarms.structs.omni_agent_types import AgentType
 from pydantic import BaseModel
-from swarms.utils.pandas_utils import (
-    dict_to_dataframe,
-    display_agents_info,
-    pydantic_model_to_dataframe,
-)
 from swarms.utils.loguru_logger import initialize_logger

 logger = initialize_logger(log_folder="base_swarm")
@ -795,17 +790,10 @@ class BaseSwarm(ABC):
        Returns:
            None
        """
-        display_agents_info(self.agents)
+        ...

    def agents_to_dataframe(self):
        """
        Convert agents to a pandas DataFrame.
        """
-        data = [agent.agent_output.dict() for agent in self.agents]
-        return dict_to_dataframe(data)
-
-    def model_to_dataframe(self):
-        """
-        Convert the Pydantic model to a pandas DataFrame.
-        """
-        return pydantic_model_to_dataframe(self.output_schema)
+        ...
--- a/swarms/structs/hiearchical_swarm.py
+++ b/swarms/structs/hiearchical_swarm.py
@ -6,7 +6,6 @@ from swarms.structs.base_swarm import BaseSwarm
 from swarms.structs.agent import Agent
 from swarms.structs.concat import concat_strings
 from swarms.structs.agent_registry import AgentRegistry
-from swarm_models.base_llm import BaseLLM
 from swarms.structs.conversation import Conversation

 logger = initialize_logger(log_folder="hiearchical_swarm")
@ -156,7 +155,7 @@ class HierarchicalAgentSwarm(BaseSwarm):
        create_agents_on: bool = False,
        template_worker_agent: Agent = None,
        director_planning_prompt: str = None,
-        template_base_worker_llm: BaseLLM = None,
+        template_base_worker_llm: Any = None,
        swarm_history: str = None,
        *args,
        **kwargs,
--- a/swarms/structs/omni_agent_types.py
+++ b/swarms/structs/omni_agent_types.py
@ -4,12 +4,10 @@ from typing import (
    Sequence,
    Union,
 )
-from swarm_models.base_llm import BaseLLM
-from swarm_models.base_multimodal_model import BaseMultiModalModel
 from swarms.structs.agent import Agent

 # Unified type for agent
-AgentType = Union[Agent, Callable, Any, BaseLLM, BaseMultiModalModel]
+AgentType = Union[Agent, Callable, Any]

 # List of agents
 AgentListType = Sequence[AgentType]
--- a/swarms/structs/swarm_builder.py
+++ b/swarms/structs/swarm_builder.py
@ -5,7 +5,6 @@ from typing import List, Optional
 from loguru import logger
 from pydantic import BaseModel, Field
 from pydantic.v1 import validator
-from swarm_models import OpenAIChat
 from tenacity import (
    retry,
    stop_after_attempt,
@ -295,18 +294,6 @@ class AutoSwarmBuilder:
            },
        )

-        # Initialize OpenAI chat model
-        try:
-            self.chat_model = OpenAIChat(
-                openai_api_key=self.api_key,
-                model_name=self.model_name,
-            )
-        except Exception as e:
-            logger.error(
-                f"Failed to initialize OpenAI chat model: {str(e)}"
-            )
-            raise
-
    def run(
        self,
        task: str,
@ -444,7 +431,7 @@ class AutoSwarmBuilder:
                agent_name=agent_name,
                description=agent_description,
                system_prompt=agent_system_prompt,
-                llm=self.chat_model,
+                model_name="gpt-4o",
                verbose=self.verbose,
                dynamic_temperature_enabled=False,
                return_step_meta=False,
--- a/swarms/tools/json_former.py
+++ b/swarms/tools/json_former.py
@ -8,7 +8,6 @@ from swarms.tools.logits_processor import (
    OutputNumbersTokens,
    StringStoppingCriteria,
 )
-from swarm_models.base_llm import BaseLLM
 from swarms.utils.auto_download_check_packages import (
    auto_check_and_download_package,
 )
@ -59,7 +58,7 @@ class Jsonformer:
        max_number_tokens: int = 6,
        temperature: float = 1.0,
        max_string_token_length: int = 10,
-        llm: BaseLLM = None,
+        llm: Any = None,
    ):
        self.model = model
        self.tokenizer = tokenizer
--- a/swarms/tools/prebuilt/code_executor.py
+++ b/swarms/tools/prebuilt/code_executor.py
@ -1,7 +1,96 @@
 import os
 import subprocess
 from loguru import logger
-from swarm_models.tiktoken_wrapper import TikTokenizer
+
+try:
+    import tiktoken
+except ImportError:
+    print("tiktoken not found, installing...")
+    subprocess.run(["pip", "install", "tiktoken"])
+    import tiktoken
+
+import concurrent.futures
+from typing import List
+
+
+class TikTokenizer:
+    def __init__(
+        self,
+        model_name: str = "o200k_base",
+    ):
+        """
+        Initializes a TikTokenizer object.
+
+        Args:
+            model_name (str, optional): The name of the model to use for tokenization. Defaults to "gpt-4o".
+        """
+        try:
+            self.model_name = model_name
+            self.encoding = tiktoken.get_encoding(model_name)
+        except Exception as e:
+            raise ValueError(
+                f"Failed to initialize tokenizer with model '{model_name}': {str(e)}"
+            )
+
+    def encode(self, string: str) -> str:
+        """
+        Tokenizes a text string.
+
+        Args:
+            string (str): The input text string.
+
+        Returns:
+            str: The tokenized text string.
+        """
+        return self.encoding.encode(string)
+
+    def decode(self, tokens: List[int]) -> str:
+        """
+        Detokenizes a text string.
+
+        Args:
+            string (str): The input tokenized text string.
+
+        Returns:
+            str: The detokenized text string.
+        """
+        return self.encoding.decode(tokens)
+
+    def count_tokens(self, string: str) -> int:
+        """
+        Returns the number of tokens in a text string.
+
+        Args:
+            string (str): The input text string.
+
+        Returns:
+            int: The number of tokens in the text string.
+        """
+        num_tokens = 0
+
+        def count_tokens_in_chunk(chunk):
+            nonlocal num_tokens
+            num_tokens += len(self.encoding.encode(chunk))
+
+        # Split the string into chunks for parallel processing
+        chunks = [
+            string[i : i + 1000] for i in range(0, len(string), 1000)
+        ]
+
+        # Create a ThreadPoolExecutor with maximum threads
+        with concurrent.futures.ThreadPoolExecutor(
+            max_workers=10
+        ) as executor:
+            # Submit each chunk for processing
+            futures = [
+                executor.submit(count_tokens_in_chunk, chunk)
+                for chunk in chunks
+            ]
+
+            # Wait for all futures to complete
+            concurrent.futures.wait(futures)
+
+        return num_tokens


 class CodeExecutor:
--- a/swarms/utils/litellm_tokenizer.py
+++ b/swarms/utils/litellm_tokenizer.py
@ -0,0 +1,6 @@
+from litellm import encode
+
+
+def count_tokens(text: str, model: str = "gpt-4o") -> int:
+    """Count the number of tokens in the given text."""
+    return len(encode(model=model, text=text))
--- a/swarms/utils/litellm_wrapper.py
+++ b/swarms/utils/litellm_wrapper.py
@ -66,7 +66,7 @@ class LiteLLM:

        return messages

-    def run(self, task: str, tools: any = [], *args, **kwargs):
+    def run(self, task: str, tools: list = [], *args, **kwargs):
        """
        Run the LLM model for the given task.

@ -88,7 +88,6 @@ class LiteLLM:
                stream=self.stream,
                temperature=self.temperature,
                max_tokens=self.max_tokens,
-                tools=tools,
                *args,
                **kwargs,
            )