swarms/apps/paper_swarm.py

"""
Paper Swarm
1. Scrape https://huggingface.co/papers for all papers, by search for all links on the paper with a /papers/, then clicks, gets the header, and then the abstract.
and various links and then adds them to a txt file for each paper on https://huggingface.co/papers

2. Feed prompts iteratively into Anthropic for summarizations + value score on impact, reliability, and novel, and other paper ranking mechanisms

3. Store papers in a database with metadata. Agents can use retrieval

4. Discord Bot // Twitter Bot
"""


import requests
from bs4 import BeautifulSoup
import os

class Paper:
    def __init__(self, title, date, authors, abstract):
        self.title = title
        self.date = date
        self.authors = authors
        self.abstract = abstract

class Scraper:
    def __init__(self, url):
        self.url = url

    def get_paper_links(self):
        response = requests.get(self.url)
        soup = BeautifulSoup(response.text, 'html.parser')
        links = [a['href'] for a in soup.find_all('a', href=True) if '/papers/' in a['href']]
        return links

    def get_paper_details(self, link):
        response = requests.get(self.url + link)
        soup = BeautifulSoup(response.text, 'html.parser')
        title = soup.find('h1').text
        date_tag = soup.find('time')
        date = date_tag.text if date_tag else 'Unknown'
        authors = [author.text for author in soup.find_all('span', class_='author')]
        abstract_tag = soup.find('div', class_='abstract')
        abstract = abstract_tag.text if abstract_tag else 'Abstract not found'
        return Paper(title, date, authors, abstract)

class FileWriter:
    def __init__(self, directory):
        self.directory = directory

    def write_paper(self, paper):
        with open(os.path.join(self.directory, paper.title + '.txt'), 'w') as f:
            f.write(f"h1: {paper.title}\n")
            f.write(f"Published on {paper.date}\n")
            f.write("Authors:\n")
            for author in paper.authors:
                f.write(f"{author}\n")
            f.write("Abstract\n")
            f.write(paper.abstract)

scraper = Scraper('https://huggingface.co/papers')
file_writer = FileWriter('images')

links = scraper.get_paper_links()
for link in links:
    paper = scraper.get_paper_details(link)
    file_writer.write_paper(paper)
exa error Former-commit-id: 4480ead6aaa10819f573d5b465a2dbfcf06a7899 1 year ago			`"""`
			`Paper Swarm`
			`1. Scrape https://huggingface.co/papers for all papers, by search for all links on the paper with a /papers/, then clicks, gets the header, and then the abstract.`
			`and various links and then adds them to a txt file for each paper on https://huggingface.co/papers`

			`2. Feed prompts iteratively into Anthropic for summarizations + value score on impact, reliability, and novel, and other paper ranking mechanisms`

			`3. Store papers in a database with metadata. Agents can use retrieval`

			`4. Discord Bot // Twitter Bot`
			`"""`


			`import requests`
			`from bs4 import BeautifulSoup`
			`import os`

			`class Paper:`
			`def __init__(self, title, date, authors, abstract):`
			`self.title = title`
			`self.date = date`
			`self.authors = authors`
			`self.abstract = abstract`

			`class Scraper:`
			`def __init__(self, url):`
			`self.url = url`

			`def get_paper_links(self):`
			`response = requests.get(self.url)`
			`soup = BeautifulSoup(response.text, 'html.parser')`
			`links = [a['href'] for a in soup.find_all('a', href=True) if '/papers/' in a['href']]`
			`return links`

			`def get_paper_details(self, link):`
			`response = requests.get(self.url + link)`
			`soup = BeautifulSoup(response.text, 'html.parser')`
			`title = soup.find('h1').text`
			`date_tag = soup.find('time')`
			`date = date_tag.text if date_tag else 'Unknown'`
			`authors = [author.text for author in soup.find_all('span', class_='author')]`
			`abstract_tag = soup.find('div', class_='abstract')`
			`abstract = abstract_tag.text if abstract_tag else 'Abstract not found'`
			`return Paper(title, date, authors, abstract)`

			`class FileWriter:`
			`def __init__(self, directory):`
			`self.directory = directory`

			`def write_paper(self, paper):`
			`with open(os.path.join(self.directory, paper.title + '.txt'), 'w') as f:`
			`f.write(f"h1: {paper.title}\n")`
			`f.write(f"Published on {paper.date}\n")`
			`f.write("Authors:\n")`
			`for author in paper.authors:`
			`f.write(f"{author}\n")`
			`f.write("Abstract\n")`
			`f.write(paper.abstract)`

			`scraper = Scraper('https://huggingface.co/papers')`
			`file_writer = FileWriter('images')`

			`links = scraper.get_paper_links()`
			`for link in links:`
			`paper = scraper.get_paper_details(link)`
			`file_writer.write_paper(paper)`