You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.
swarms/sentiment_news_analysis.py

234 lines
7.4 KiB

# pip install swarms bs4 requests
import re
from typing import Any, Dict
from urllib.parse import urlparse
import requests
from bs4 import BeautifulSoup
from dotenv import load_dotenv
from swarms import Agent
load_dotenv()
# Custom system prompt for financial sentiment analysis
FINANCIAL_SENTIMENT_SYSTEM_PROMPT = """
You are an expert financial analyst specializing in sentiment analysis of financial news and content. Your task is to:
1. Analyze financial content for bullish or bearish sentiment
2. Provide a numerical sentiment score between 0.0 (extremely bearish) and 1.0 (extremely bullish) where:
- 0.0-0.2: Extremely bearish (strong negative outlook)
- 0.2-0.4: Bearish (negative outlook)
- 0.4-0.6: Neutral (balanced or unclear outlook)
- 0.6-0.8: Bullish (positive outlook)
- 0.8-1.0: Extremely bullish (strong positive outlook)
3. Provide detailed rationale for your sentiment score by considering:
- Market indicators and metrics mentioned
- Expert opinions and quotes
- Historical comparisons
- Industry trends and context
- Risk factors and potential challenges
- Growth opportunities and positive catalysts
- Overall market sentiment and broader economic factors
Your analysis should be:
- Objective and data-driven
- Based on factual information present in the content
- Free from personal bias or speculation
- Considering both explicit and implicit sentiment indicators
- Taking into account the broader market context
For each analysis, structure your response as a clear sentiment score backed by comprehensive reasoning that explains why you arrived at that specific rating.
"""
class ArticleExtractor:
"""Class to handle article content extraction and cleaning."""
# Common financial news domains and their article content selectors
DOMAIN_SELECTORS = {
"seekingalpha.com": {"article": "div#SA-content"},
"finance.yahoo.com": {"article": "div.caas-body"},
"reuters.com": {
"article": "div.article-body__content__17Yit"
},
"bloomberg.com": {"article": "div.body-content"},
"marketwatch.com": {"article": "div.article__body"},
# Add more domains and their selectors as needed
}
@staticmethod
def get_domain(url: str) -> str:
"""Extract domain from URL."""
return urlparse(url).netloc.lower()
@staticmethod
def clean_text(text: str) -> str:
"""Clean extracted text content."""
# Remove extra whitespace
text = re.sub(r"\s+", " ", text)
# Remove special characters but keep basic punctuation
text = re.sub(r"[^\w\s.,!?-]", "", text)
# Remove multiple periods
text = re.sub(r"\.{2,}", ".", text)
return text.strip()
@classmethod
def extract_article_content(
cls, html_content: str, domain: str
) -> str:
"""Extract article content using domain-specific selectors."""
soup = BeautifulSoup(html_content, "html.parser")
# Remove unwanted elements
for element in soup.find_all(
["script", "style", "nav", "header", "footer", "iframe"]
):
element.decompose()
# Try domain-specific selector first
if domain in cls.DOMAIN_SELECTORS:
selector = cls.DOMAIN_SELECTORS[domain]["article"]
content = soup.select_one(selector)
if content:
return cls.clean_text(content.get_text())
# Fallback to common article containers
article_containers = [
"article",
'[role="article"]',
".article-content",
".post-content",
".entry-content",
"#main-content",
]
for container in article_containers:
content = soup.select_one(container)
if content:
return cls.clean_text(content.get_text())
# Last resort: extract all paragraph text
paragraphs = soup.find_all("p")
if paragraphs:
return cls.clean_text(
" ".join(p.get_text() for p in paragraphs)
)
return cls.clean_text(soup.get_text())
def fetch_url_content(url: str) -> Dict[str, Any]:
"""
Fetch and extract content from a financial news URL.
Args:
url (str): The URL of the financial news article
Returns:
Dict[str, Any]: Dictionary containing extracted content and metadata
"""
try:
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36"
}
response = requests.get(url, headers=headers, timeout=10)
response.raise_for_status()
domain = ArticleExtractor.get_domain(url)
content = ArticleExtractor.extract_article_content(
response.text, domain
)
# Extract title if available
soup = BeautifulSoup(response.text, "html.parser")
title = soup.title.string if soup.title else None
return {
"title": title,
"content": content,
"domain": domain,
"url": url,
"status": "success",
}
except Exception as e:
return {
"content": f"Error fetching URL content: {str(e)}",
"status": "error",
"url": url,
}
tools = [
{
"type": "function",
"function": {
"name": "analyze_sentiment",
"description": "Analyze the sentiment of financial content and provide a bullish/bearish rating with rationale.",
"parameters": {
"type": "object",
"properties": {
"sentiment_score": {
"type": "number",
"description": "A score from 0.0 (extremely bearish) to 1.0 (extremely bullish)",
},
"rationale": {
"type": "string",
"description": "Detailed explanation of the sentiment analysis",
},
},
"required": ["sentiment_score", "rationale"],
},
},
}
]
# Initialize the agent
agent = Agent(
agent_name="Financial-Sentiment-Analyst",
agent_description="Expert financial sentiment analyzer that provides detailed bullish/bearish analysis of financial content",
system_prompt=FINANCIAL_SENTIMENT_SYSTEM_PROMPT,
max_loops=1,
tools_list_dictionary=tools,
output_type="final",
model_name="gpt-4o",
)
def run_sentiment_agent(url: str) -> Dict[str, Any]:
"""
Run the sentiment analysis agent on a given URL.
Args:
url (str): The URL of the financial content to analyze
Returns:
Dict[str, Any]: Dictionary containing sentiment analysis results
"""
article_data = fetch_url_content(url)
if article_data["status"] == "error":
return {"error": article_data["content"], "status": "error"}
prompt = f"""
Analyze the following financial article:
Title: {article_data.get('title', 'N/A')}
Source: {article_data['domain']}
URL: {article_data['url']}
Content:
{article_data['content']}
Please provide a detailed sentiment analysis with a score and explanation.
"""
return agent.run(prompt)
if __name__ == "__main__":
url = "https://finance.yahoo.com/"
result = run_sentiment_agent(url)
print(result)