import json
import sys
from pathlib import Path

import pandas as pd

# Add project root to Python path for imports
project_root = Path(__file__).resolve().parent.parent
sys.path.append(str(project_root))

# Assuming these are defined in your project structure
from config import DATA_DIR, logger  # Adjust import as needed
from src.embeddings import CustomHuggingFaceEmbeddings

# Import FAISS after potentially adding to sys.path
try:
    from langchain_community.vectorstores import FAISS

    faiss_installed = True
except ImportError:
    print("Warning: langchain_community or faiss not installed. Cannot check FAISS index.")
    faiss_installed = False


def check_output_files(processed_dir: Path):
    """Prints head and tail of key processed files and FAISS index info.

    Args:
        processed_dir: The path to the 'data/processed' directory.
    """
    print("--- Checking Processed Files ---")

    # 1. Check paragraphs.csv
    csv_path = processed_dir / "paragraphs.csv"
    print(f"\n--- Checking {csv_path} ---")
    try:
        df = pd.read_csv(csv_path)
        print("First 3 rows:")
        print(df.head(3).to_string())
        print("\nLast 3 rows:")
        print(df.tail(3).to_string())
        print(f"Total rows: {len(df)}")
    except FileNotFoundError:
        print(f"Error: {csv_path} not found.")
    except Exception as e:
        print(f"Error reading {csv_path}: {e}")

    # 2. Check questions.jsonl
    jsonl_path = processed_dir / "questions.jsonl"
    print(f"\n--- Checking {jsonl_path} ---")
    try:
        with open(jsonl_path, "r", encoding="utf-8") as f:
            lines = f.readlines()

        num_lines = len(lines)
        print(f"Total lines: {num_lines}")

        if num_lines > 0:
            print("\nFirst 3 lines (parsed JSON):")
            for i in range(min(3, num_lines)):
                try:
                    print(json.loads(lines[i].strip()))
                except json.JSONDecodeError:
                    print(f"  (Error parsing line {i + 1})")

        if num_lines > 3:
            print("\nLast 3 lines (parsed JSON):")
            for i in range(max(0, num_lines - 3), num_lines):
                try:
                    print(json.loads(lines[i].strip()))
                except json.JSONDecodeError:
                    print(f"  (Error parsing line {i + 1})")
        elif num_lines > 0:
            print("\n(Less than 6 lines total, showing all)")

    except FileNotFoundError:
        print(f"Error: {jsonl_path} not found.")
    except Exception as e:
        print(f"Error reading {jsonl_path}: {e}")

    # 3. Check FAISS index
    print(f"\n--- Checking FAISS Index in {processed_dir} ---")
    if not faiss_installed:
        print("Skipping FAISS check as required libraries are not installed.")
        return

    # FAISS loads from the directory containing index.faiss and index.pkl
    index_dir = processed_dir
    index_file = index_dir / "index.faiss"
    pkl_file = index_dir / "index.pkl"

    if not index_file.exists() or not pkl_file.exists():
        print(f"Error: FAISS index files (index.faiss, index.pkl) not found in {index_dir}")
        return

    try:
        print("Initializing embeddings model for loading index...")
        embeddings = CustomHuggingFaceEmbeddings()
        print("Loading FAISS index...")
        # FAISS.load_local requires the folder_path and the embeddings object
        vectorstore = FAISS.load_local(str(index_dir), embeddings, allow_dangerous_deserialization=True)
        print("FAISS index loaded successfully.")
        # Access the underlying FAISS index object to get the total number of vectors
        print(f"Total vectors in index: {vectorstore.index.ntotal}")
    except Exception as e:
        print(f"Error loading or checking FAISS index from {index_dir}: {e}")
        import traceback

        traceback.print_exc()

    print("\n--- Check Complete ---")


if __name__ == "__main__":
    # Assuming the script is run from the project root or paths are relative
    PROCESSED_PATH = Path("data/processed")
    check_output_files(PROCESSED_PATH)