feat: Добавлена возможность сохранения всех полученных сообщений в БД pg

main
Artem-Darius Weber 6 days ago
parent 710b0c4353
commit 6faea9e894

@ -1,6 +1,12 @@
FROM python:3.9-slim
WORKDIR /app
RUN apt-get update && apt-get install -y --no-install-recommends \
gcc \
libpq-dev \
&& rm -rf /var/lib/apt/lists/*
COPY requirements.txt .
RUN pip install --no-cache-dir -r requirements.txt

@ -5,4 +5,20 @@ services:
environment:
GROUP_ID: "group_id"
ACCESS_TOKEN: "access_token"
restart: unless-stopped
DATABASE_URL: "postgresql://postgres:password@db:5432/vkbot"
depends_on:
db
restart: unless-stopped
db:
image: postgres:15
environment:
POSTGRES_USER: "postgres"
POSTGRES_PASSWORD: "password"
POSTGRES_DB: "vkbot"
volumes:
- db_data:/var/lib/postgresql/data
restart: always
volumes:
db_data:

@ -1,3 +1,4 @@
vk_api
transformers
torch
torch
psycopg2-binary

@ -4,18 +4,41 @@ from vk_api.bot_longpoll import VkBotLongPoll, VkBotEventType
import time
from transformers import AutoTokenizer, AutoModelForSequenceClassification
import torch
import os
import psycopg2
# Настройка логирования
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)
model_path = "RUSpam/spam_deberta_v4"
tokenizer = AutoTokenizer.from_pretrained(model_path)
model = AutoModelForSequenceClassification.from_pretrained(model_path)
GROUP_ID = ""
ACCESS_TOKEN = ""
GROUP_ID = os.getenv("GROUP_ID")
ACCESS_TOKEN = os.getenv("ACCESS_TOKEN")
DATABASE_URL = os.getenv("DATABASE_URL")
THRESHOLD_REACTIONS = 3
conn = psycopg2.connect(DATABASE_URL)
cursor = conn.cursor()
cursor.execute("""
CREATE TABLE IF NOT EXISTS messages (
id SERIAL PRIMARY KEY,
user_id BIGINT,
peer_id BIGINT,
message_id BIGINT,
text TEXT,
timestamp TIMESTAMP DEFAULT CURRENT_TIMESTAMP
)
""")
conn.commit()
def save_message(user_id, peer_id, message_id, text):
cursor.execute("""
INSERT INTO messages (user_id, peer_id, message_id, text)
VALUES (%s, %s, %s, %s)
""", (user_id, peer_id, message_id, text))
conn.commit()
def is_spam(message):
inputs = tokenizer(message, return_tensors="pt", truncation=True, max_length=256)
with torch.no_grad():
@ -24,14 +47,12 @@ def is_spam(message):
predicted_class = torch.argmax(logits, dim=1).item()
return predicted_class == 1
# Основной класс бота
class VkBot:
def __init__(self, group_id, token):
self.group_id = group_id
self.vk_session = vk_api.VkApi(token=token)
self.vk = self.vk_session.get_api()
self.longpoll = VkBotLongPoll(self.vk_session, group_id)
self.message_reactions = {}
def run(self):
logger.info("Бот запущен!")
@ -43,25 +64,27 @@ class VkBot:
message_id = message["conversation_message_id"]
text = message["text"]
save_message(user_id, peer_id, message_id, text)
if peer_id > 2_000_000_000 and is_spam(text):
logger.warning(f"Обнаружено спам-сообщение от пользователя {user_id}: {text}")
self.delete_message(peer_id=peer_id, user_id=user_id, message_id=message_id)
def delete_message(self, peer_id, user_id, message_id):
"""Удалить сообщение"""
if not self.is_conservation_admin(peer_id=peer_id, user_id=user_id):
self.vk.messages.delete(cmids=message_id, peer_id=peer_id)
logger.info(f"Сообщение {message_id} удалено из беседы {peer_id} пользователем {user_id}.")
def is_conservation_admin(self, peer_id, user_id):
"""Проверка пользователя на администратора беседы."""
members = self.vk.messages.getConversationMembers(peer_id=peer_id)
for member in members['items']:
if member['member_id'] == user_id:
return member['is_admin']
return False
# Запуск бота
if __name__ == "__main__":
if not GROUP_ID or not ACCESS_TOKEN or not DATABASE_URL:
logger.error("GROUP_ID, ACCESS_TOKEN и DATABASE_URL должны быть заданы в переменных окружения!")
exit(1)
bot = VkBot(GROUP_ID, ACCESS_TOKEN)
bot.run()
Loading…
Cancel
Save