"""
Paper Swarm
1. Scrape https://huggingface.co/papers for all papers, by search for all links on the paper with a /papers/, then clicks, gets the header, and then the abstract.
and various links and then adds them to a txt file for each paper on https://huggingface.co/papers

2. Feed prompts iteratively into Anthropic for summarizations + value score on impact, reliability, and novel, and other paper ranking mechanisms

3. Store papers in a database with metadata. Agents can use retrieval

4. Discord Bot // Twitter Bot
"""


import requests
from bs4 import BeautifulSoup
import os


class Paper:
    def __init__(self, title, date, authors, abstract):
        self.title = title
        self.date = date
        self.authors = authors
        self.abstract = abstract


class Scraper:
    def __init__(self, url):
        self.url = url

    def get_paper_links(self):
        response = requests.get(self.url)
        soup = BeautifulSoup(response.text, "html.parser")
        links = [
            a["href"] for a in soup.find_all("a", href=True) if "/papers/" in a["href"]
        ]
        return links

    def get_paper_details(self, link):
        response = requests.get(self.url + link)
        soup = BeautifulSoup(response.text, "html.parser")
        title = soup.find("h1").text
        date_tag = soup.find("time")
        date = date_tag.text if date_tag else "Unknown"
        authors = [author.text for author in soup.find_all("span", class_="author")]
        abstract_tag = soup.find("div", class_="abstract")
        abstract = abstract_tag.text if abstract_tag else "Abstract not found"
        return Paper(title, date, authors, abstract)


class FileWriter:
    def __init__(self, directory):
        self.directory = directory

    def write_paper(self, paper):
        with open(os.path.join(self.directory, paper.title + ".txt"), "w") as f:
            f.write(f"h1: {paper.title}\n")
            f.write(f"Published on {paper.date}\n")
            f.write("Authors:\n")
            for author in paper.authors:
                f.write(f"{author}\n")
            f.write("Abstract\n")
            f.write(paper.abstract)


scraper = Scraper("https://huggingface.co/papers")
file_writer = FileWriter("images")

links = scraper.get_paper_links()
for link in links:
    paper = scraper.get_paper_details(link)
    file_writer.write_paper(paper)