Merge pull request #4 from ascender1729/implement-news-aggregator-and-summarizer-agent
Add news aggregator summarizer examplepull/863/head
commit
95d3672ce2
@ -0,0 +1,65 @@
|
|||||||
|
import requests
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
from swarms import Agent
|
||||||
|
from swarms.prompts.summaries_prompts import SUMMARIZE_PROMPT
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_hackernews_headlines(limit: int = 5):
|
||||||
|
"""Fetch top headlines from Hacker News."""
|
||||||
|
resp = requests.get("https://news.ycombinator.com")
|
||||||
|
resp.raise_for_status()
|
||||||
|
soup = BeautifulSoup(resp.text, "html.parser")
|
||||||
|
headlines = []
|
||||||
|
for item in soup.select("tr.athing")[:limit]:
|
||||||
|
link = item.select_one("span.titleline a")
|
||||||
|
if link:
|
||||||
|
headlines.append({"title": link.get_text(), "url": link["href"]})
|
||||||
|
return headlines
|
||||||
|
|
||||||
|
|
||||||
|
def fetch_article_content(url: str) -> str:
|
||||||
|
"""Pull text content from an article URL."""
|
||||||
|
try:
|
||||||
|
res = requests.get(url, timeout=10)
|
||||||
|
res.raise_for_status()
|
||||||
|
except Exception:
|
||||||
|
return ""
|
||||||
|
soup = BeautifulSoup(res.text, "html.parser")
|
||||||
|
for tag in soup(["script", "style", "nav", "header", "footer"]):
|
||||||
|
tag.decompose()
|
||||||
|
text = " ".join(p.get_text() for p in soup.find_all("p"))
|
||||||
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
|
summarizer = Agent(
|
||||||
|
agent_name="News-Summarizer",
|
||||||
|
system_prompt="You summarize news articles succinctly.",
|
||||||
|
max_loops=1,
|
||||||
|
model_name="gpt-4o-mini",
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
def summarize_article(text: str) -> str:
|
||||||
|
prompt = f"{SUMMARIZE_PROMPT}\n\n{text}"
|
||||||
|
return summarizer.run(prompt)
|
||||||
|
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
headlines = fetch_hackernews_headlines()
|
||||||
|
print("Top Headlines:\n")
|
||||||
|
for idx, headline in enumerate(headlines, 1):
|
||||||
|
print(f"{idx}. {headline['title']}")
|
||||||
|
|
||||||
|
summaries = []
|
||||||
|
for article in headlines[:2]:
|
||||||
|
content = fetch_article_content(article["url"])
|
||||||
|
summary = summarize_article(content)
|
||||||
|
summaries.append({"title": article["title"], "summary": summary})
|
||||||
|
|
||||||
|
print("\nArticle Summaries:\n")
|
||||||
|
for s in summaries:
|
||||||
|
print(f"{s['title']}\n{s['summary']}\n")
|
||||||
|
|
||||||
|
with open("news_summaries.txt", "w") as f:
|
||||||
|
for s in summaries:
|
||||||
|
f.write(f"{s['title']}\n{s['summary']}\n\n")
|
Loading…
Reference in new issue