swarms/sentiment_news_analysis.py

# pip install swarms bs4 requests

import re
from typing import Any, Dict
from urllib.parse import urlparse

import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv

from swarms import Agent

load_dotenv()

# Custom system prompt for financial sentiment analysis
FINANCIAL_SENTIMENT_SYSTEM_PROMPT = """
You are an expert financial analyst specializing in sentiment analysis of financial news and content. Your task is to:

1. Analyze financial content for bullish or bearish sentiment
2. Provide a numerical sentiment score between 0.0 (extremely bearish) and 1.0 (extremely bullish) where:
   - 0.0-0.2: Extremely bearish (strong negative outlook)
   - 0.2-0.4: Bearish (negative outlook)
   - 0.4-0.6: Neutral (balanced or unclear outlook)
   - 0.6-0.8: Bullish (positive outlook)
   - 0.8-1.0: Extremely bullish (strong positive outlook)

3. Provide detailed rationale for your sentiment score by considering:
   - Market indicators and metrics mentioned
   - Expert opinions and quotes
   - Historical comparisons
   - Industry trends and context
   - Risk factors and potential challenges
   - Growth opportunities and positive catalysts
   - Overall market sentiment and broader economic factors

Your analysis should be:
- Objective and data-driven
- Based on factual information present in the content
- Free from personal bias or speculation
- Considering both explicit and implicit sentiment indicators
- Taking into account the broader market context

For each analysis, structure your response as a clear sentiment score backed by comprehensive reasoning that explains why you arrived at that specific rating.
"""


class ArticleExtractor:
    """Class to handle article content extraction and cleaning."""

    # Common financial news domains and their article content selectors
    DOMAIN_SELECTORS = {
        "seekingalpha.com": {"article": "div#SA-content"},
        "finance.yahoo.com": {"article": "div.caas-body"},
        "reuters.com": {
            "article": "div.article-body__content__17Yit"
        },
        "bloomberg.com": {"article": "div.body-content"},
        "marketwatch.com": {"article": "div.article__body"},
        # Add more domains and their selectors as needed
    }

    @staticmethod
    def get_domain(url: str) -> str:
        """Extract domain from URL."""
        return urlparse(url).netloc.lower()

    @staticmethod
    def clean_text(text: str) -> str:
        """Clean extracted text content."""
        # Remove extra whitespace
        text = re.sub(r"\s+", " ", text)
        # Remove special characters but keep basic punctuation
        text = re.sub(r"[^\w\s.,!?-]", "", text)
        # Remove multiple periods
        text = re.sub(r"\.{2,}", ".", text)
        return text.strip()

    @classmethod
    def extract_article_content(
        cls, html_content: str, domain: str
    ) -> str:
        """Extract article content using domain-specific selectors."""
        soup = BeautifulSoup(html_content, "html.parser")

        # Remove unwanted elements
        for element in soup.find_all(
            ["script", "style", "nav", "header", "footer", "iframe"]
        ):
            element.decompose()

        # Try domain-specific selector first
        if domain in cls.DOMAIN_SELECTORS:
            selector = cls.DOMAIN_SELECTORS[domain]["article"]
            content = soup.select_one(selector)
            if content:
                return cls.clean_text(content.get_text())

        # Fallback to common article containers
        article_containers = [
            "article",
            '[role="article"]',
            ".article-content",
            ".post-content",
            ".entry-content",
            "#main-content",
        ]

        for container in article_containers:
            content = soup.select_one(container)
            if content:
                return cls.clean_text(content.get_text())

        # Last resort: extract all paragraph text
        paragraphs = soup.find_all("p")
        if paragraphs:
            return cls.clean_text(
                " ".join(p.get_text() for p in paragraphs)
            )

        return cls.clean_text(soup.get_text())


def fetch_url_content(url: str) -> Dict[str, Any]:
    """
    Fetch and extract content from a financial news URL.

    Args:
        url (str): The URL of the financial news article

    Returns:
        Dict[str, Any]: Dictionary containing extracted content and metadata
    """
    try:
        headers = {
            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
        }
        response = requests.get(url, headers=headers, timeout=10)
        response.raise_for_status()

        domain = ArticleExtractor.get_domain(url)
        content = ArticleExtractor.extract_article_content(
            response.text, domain
        )

        # Extract title if available
        soup = BeautifulSoup(response.text, "html.parser")
        title = soup.title.string if soup.title else None

        return {
            "title": title,
            "content": content,
            "domain": domain,
            "url": url,
            "status": "success",
        }
    except Exception as e:
        return {
            "content": f"Error fetching URL content: {str(e)}",
            "status": "error",
            "url": url,
        }


tools = [
    {
        "type": "function",
        "function": {
            "name": "analyze_sentiment",
            "description": "Analyze the sentiment of financial content and provide a bullish/bearish rating with rationale.",
            "parameters": {
                "type": "object",
                "properties": {
                    "sentiment_score": {
                        "type": "number",
                        "description": "A score from 0.0 (extremely bearish) to 1.0 (extremely bullish)",
                    },
                    "rationale": {
                        "type": "string",
                        "description": "Detailed explanation of the sentiment analysis",
                    },
                },
                "required": ["sentiment_score", "rationale"],
            },
        },
    }
]

# Initialize the agent
agent = Agent(
    agent_name="Financial-Sentiment-Analyst",
    agent_description="Expert financial sentiment analyzer that provides detailed bullish/bearish analysis of financial content",
    system_prompt=FINANCIAL_SENTIMENT_SYSTEM_PROMPT,
    max_loops=1,
    tools_list_dictionary=tools,
    output_type="final",
    model_name="gpt-4o",
)


def run_sentiment_agent(url: str) -> Dict[str, Any]:
    """
    Run the sentiment analysis agent on a given URL.

    Args:
        url (str): The URL of the financial content to analyze

    Returns:
        Dict[str, Any]: Dictionary containing sentiment analysis results
    """
    article_data = fetch_url_content(url)

    if article_data["status"] == "error":
        return {"error": article_data["content"], "status": "error"}

    prompt = f"""
        Analyze the following financial article:
        Title: {article_data.get('title', 'N/A')}
        Source: {article_data['domain']}
        URL: {article_data['url']}

        Content:
        {article_data['content']}

        Please provide a detailed sentiment analysis with a score and explanation.
        """

    return agent.run(prompt)


if __name__ == "__main__":
    url = "https://finance.yahoo.com/"
    result = run_sentiment_agent(url)
    print(result)