diff --git a/Dockerfile b/Dockerfile index db29e59..551368d 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,12 @@ FROM python:3.9-slim WORKDIR /app + +RUN apt-get update && apt-get install -y --no-install-recommends \ + gcc \ + libpq-dev \ + && rm -rf /var/lib/apt/lists/* + COPY requirements.txt . RUN pip install --no-cache-dir -r requirements.txt diff --git a/compose.yaml b/compose.yaml index 7b9a946..64d31f8 100644 --- a/compose.yaml +++ b/compose.yaml @@ -5,4 +5,20 @@ services: environment: GROUP_ID: "group_id" ACCESS_TOKEN: "access_token" - restart: unless-stopped \ No newline at end of file + DATABASE_URL: "postgresql://postgres:password@db:5432/vkbot" + depends_on: + db + restart: unless-stopped + + db: + image: postgres:15 + environment: + POSTGRES_USER: "postgres" + POSTGRES_PASSWORD: "password" + POSTGRES_DB: "vkbot" + volumes: + - db_data:/var/lib/postgresql/data + restart: always + +volumes: + db_data: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1a0e494..4f19716 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ vk_api transformers -torch \ No newline at end of file +torch +psycopg2-binary \ No newline at end of file diff --git a/src/main.py b/src/main.py index cd3a07a..a85f602 100644 --- a/src/main.py +++ b/src/main.py @@ -4,18 +4,41 @@ from vk_api.bot_longpoll import VkBotLongPoll, VkBotEventType import time from transformers import AutoTokenizer, AutoModelForSequenceClassification import torch +import os +import psycopg2 -# Настройка логирования logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s') logger = logging.getLogger(__name__) model_path = "RUSpam/spam_deberta_v4" tokenizer = AutoTokenizer.from_pretrained(model_path) model = AutoModelForSequenceClassification.from_pretrained(model_path) -GROUP_ID = "" -ACCESS_TOKEN = "" +GROUP_ID = os.getenv("GROUP_ID") +ACCESS_TOKEN = os.getenv("ACCESS_TOKEN") +DATABASE_URL = os.getenv("DATABASE_URL") THRESHOLD_REACTIONS = 3 +conn = psycopg2.connect(DATABASE_URL) +cursor = conn.cursor() +cursor.execute(""" +CREATE TABLE IF NOT EXISTS messages ( + id SERIAL PRIMARY KEY, + user_id BIGINT, + peer_id BIGINT, + message_id BIGINT, + text TEXT, + timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP +) +""") +conn.commit() + +def save_message(user_id, peer_id, message_id, text): + cursor.execute(""" + INSERT INTO messages (user_id, peer_id, message_id, text) + VALUES (%s, %s, %s, %s) + """, (user_id, peer_id, message_id, text)) + conn.commit() + def is_spam(message): inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=256) with torch.no_grad(): @@ -24,14 +47,12 @@ def is_spam(message): predicted_class = torch.argmax(logits, dim=1).item() return predicted_class == 1 -# Основной класс бота class VkBot: def __init__(self, group_id, token): self.group_id = group_id self.vk_session = vk_api.VkApi(token=token) self.vk = self.vk_session.get_api() self.longpoll = VkBotLongPoll(self.vk_session, group_id) - self.message_reactions = {} def run(self): logger.info("Бот запущен!") @@ -43,25 +64,27 @@ class VkBot: message_id = message["conversation_message_id"] text = message["text"] + save_message(user_id, peer_id, message_id, text) + if peer_id > 2_000_000_000 and is_spam(text): logger.warning(f"Обнаружено спам-сообщение от пользователя {user_id}: {text}") self.delete_message(peer_id=peer_id, user_id=user_id, message_id=message_id) def delete_message(self, peer_id, user_id, message_id): - """Удалить сообщение""" if not self.is_conservation_admin(peer_id=peer_id, user_id=user_id): self.vk.messages.delete(cmids=message_id, peer_id=peer_id) logger.info(f"Сообщение {message_id} удалено из беседы {peer_id} пользователем {user_id}.") def is_conservation_admin(self, peer_id, user_id): - """Проверка пользователя на администратора беседы.""" members = self.vk.messages.getConversationMembers(peer_id=peer_id) for member in members['items']: if member['member_id'] == user_id: return member['is_admin'] return False -# Запуск бота if __name__ == "__main__": + if not GROUP_ID or not ACCESS_TOKEN or not DATABASE_URL: + logger.error("GROUP_ID, ACCESS_TOKEN и DATABASE_URL должны быть заданы в переменных окружения!") + exit(1) bot = VkBot(GROUP_ID, ACCESS_TOKEN) bot.run() \ No newline at end of file