From e62afcf0ae9084dae5c842d0d3af4c6c1befb99b Mon Sep 17 00:00:00 2001 From: Pavan Kumar <66913595+ascender1729@users.noreply.github.com> Date: Wed, 4 Jun 2025 22:03:02 +0530 Subject: [PATCH] Add news aggregator summarizer example --- examples/news_aggregator_summarizer.py | 65 ++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) create mode 100644 examples/news_aggregator_summarizer.py diff --git a/examples/news_aggregator_summarizer.py b/examples/news_aggregator_summarizer.py new file mode 100644 index 00000000..864e7481 --- /dev/null +++ b/examples/news_aggregator_summarizer.py @@ -0,0 +1,65 @@ +import requests +from bs4 import BeautifulSoup +from swarms import Agent +from swarms.prompts.summaries_prompts import SUMMARIZE_PROMPT + + +def fetch_hackernews_headlines(limit: int = 5): + """Fetch top headlines from Hacker News.""" + resp = requests.get("https://news.ycombinator.com") + resp.raise_for_status() + soup = BeautifulSoup(resp.text, "html.parser") + headlines = [] + for item in soup.select("tr.athing")[:limit]: + link = item.select_one("span.titleline a") + if link: + headlines.append({"title": link.get_text(), "url": link["href"]}) + return headlines + + +def fetch_article_content(url: str) -> str: + """Pull text content from an article URL.""" + try: + res = requests.get(url, timeout=10) + res.raise_for_status() + except Exception: + return "" + soup = BeautifulSoup(res.text, "html.parser") + for tag in soup(["script", "style", "nav", "header", "footer"]): + tag.decompose() + text = " ".join(p.get_text() for p in soup.find_all("p")) + return text.strip() + + +summarizer = Agent( + agent_name="News-Summarizer", + system_prompt="You summarize news articles succinctly.", + max_loops=1, + model_name="gpt-4o-mini", +) + + +def summarize_article(text: str) -> str: + prompt = f"{SUMMARIZE_PROMPT}\n\n{text}" + return summarizer.run(prompt) + + +if __name__ == "__main__": + headlines = fetch_hackernews_headlines() + print("Top Headlines:\n") + for idx, headline in enumerate(headlines, 1): + print(f"{idx}. {headline['title']}") + + summaries = [] + for article in headlines[:2]: + content = fetch_article_content(article["url"]) + summary = summarize_article(content) + summaries.append({"title": article["title"], "summary": summary}) + + print("\nArticle Summaries:\n") + for s in summaries: + print(f"{s['title']}\n{s['summary']}\n") + + with open("news_summaries.txt", "w") as f: + for s in summaries: + f.write(f"{s['title']}\n{s['summary']}\n\n")