""" Paper Swarm 1. Scrape https://huggingface.co/papers for all papers, by search for all links on the paper with a /papers/, then clicks, gets the header, and then the abstract. and various links and then adds them to a txt file for each paper on https://huggingface.co/papers 2. Feed prompts iteratively into Anthropic for summarizations + value score on impact, reliability, and novel, and other paper ranking mechanisms 3. Store papers in a database with metadata. Agents can use retrieval 4. Discord Bot // Twitter Bot """ import requests from bs4 import BeautifulSoup import os class Paper: def __init__(self, title, date, authors, abstract): self.title = title self.date = date self.authors = authors self.abstract = abstract class Scraper: def __init__(self, url): self.url = url def get_paper_links(self): response = requests.get(self.url) soup = BeautifulSoup(response.text, 'html.parser') links = [a['href'] for a in soup.find_all('a', href=True) if '/papers/' in a['href']] return links def get_paper_details(self, link): response = requests.get(self.url + link) soup = BeautifulSoup(response.text, 'html.parser') title = soup.find('h1').text date_tag = soup.find('time') date = date_tag.text if date_tag else 'Unknown' authors = [author.text for author in soup.find_all('span', class_='author')] abstract_tag = soup.find('div', class_='abstract') abstract = abstract_tag.text if abstract_tag else 'Abstract not found' return Paper(title, date, authors, abstract) class FileWriter: def __init__(self, directory): self.directory = directory def write_paper(self, paper): with open(os.path.join(self.directory, paper.title + '.txt'), 'w') as f: f.write(f"h1: {paper.title}\n") f.write(f"Published on {paper.date}\n") f.write("Authors:\n") for author in paper.authors: f.write(f"{author}\n") f.write("Abstract\n") f.write(paper.abstract) scraper = Scraper('https://huggingface.co/papers') file_writer = FileWriter('images') links = scraper.get_paper_links() for link in links: paper = scraper.get_paper_details(link) file_writer.write_paper(paper)