diff --git a/docker-compose.yaml b/docker-compose.yaml index e754039..f8cce3c 100644 --- a/docker-compose.yaml +++ b/docker-compose.yaml @@ -7,21 +7,41 @@ services: ports: - "5000:5000" volumes: - - .:/app + - ./web:/app/web + - /var/run/docker.sock:/var/run/docker.sock + - type: bind + source: ./tg_nodes.log + target: /app/tg_nodes.log + read_only: true + environment: + - PYTHONUNBUFFERED=1 + privileged: true + restart: unless-stopped command: python web/app.py - depends_on: - - tg_node_0 - - tg_node_1 tg_node_0: build: . + volumes: + - type: bind + source: ./tg_nodes.log + target: /app/tg_nodes.log + environment: + - PYTHONUNBUFFERED=1 container_name: tg-node-0 - command: python tg/node_tg_0.py + restart: unless-stopped + command: python tg/tg_node_0.py tg_node_1: build: . + volumes: + - type: bind + source: ./tg_nodes.log + target: /app/tg_nodes.log + environment: + - PYTHONUNBUFFERED=1 container_name: tg-node-1 - command: python tg/node_tg_1.py + restart: unless-stopped + command: python tg/tg_node_1.py volumes: - pg_data: + pg_data: \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index aafe21d..82c694b 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ selenium grpc_interceptor_headers telethon schedule -psycopg2-binary \ No newline at end of file +psycopg2-binary +docker \ No newline at end of file diff --git a/tg/tg_crawler.py b/tg/tg_crawler.py index eefe1df..9c568da 100644 --- a/tg/tg_crawler.py +++ b/tg/tg_crawler.py @@ -1,14 +1,18 @@ import logging +import os import psycopg2 from datetime import datetime from telethon.sync import TelegramClient from telethon.tl.functions.messages import GetHistoryRequest +LOG_FILE = os.getenv('LOG_FILE', '/app/tg_nodes.log') + +if os.path.exists(LOG_FILE) and os.path.isdir(LOG_FILE): + raise RuntimeError(f"Path {LOG_FILE} is a directory! Expected file.") + logging.basicConfig( - level=logging.INFO, - format='%(asctime)s - %(levelname)s - %(message)s', handlers=[ - logging.FileHandler('tg_nodes.log'), + logging.FileHandler(LOG_FILE), logging.StreamHandler() ] ) diff --git a/tg/tg_node_0.py b/tg/tg_node_0.py index 7693788..afa58ad 100644 --- a/tg/tg_node_0.py +++ b/tg/tg_node_0.py @@ -1,34 +1,47 @@ import asyncio import os -from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.schedulers.background import BackgroundScheduler from pytz import timezone from tg_crawler import TelegramChannelMonitor from dotenv import load_dotenv +import logging load_dotenv() -TelegramChannelMonitor.set_db_config({ - 'host': os.getenv("HOST"), - 'port': os.getenv("PORT"), - 'database': os.getenv("DBNAME"), - 'user': os.getenv("USER"), - 'password': os.getenv("PASSWORD") -}) - -monitor = TelegramChannelMonitor( - session_name='session_trueosint', - api_id=os.getenv("TELETHON_API_ID"), - api_hash=os.getenv("TELETHON_API_HASH"), - channel_username='trueosint', - source_name='trueosint' -) - def main(): - scheduler = AsyncIOScheduler() - scheduler.add_job(monitor.fetch_last_post, "cron", hour=9, minute=0, timezone=timezone("Europe/Moscow")) - scheduler.start() + TelegramChannelMonitor.set_db_config({ + 'host': os.getenv("HOST"), + 'port': os.getenv("PORT"), + 'database': os.getenv("DBNAME"), + 'user': os.getenv("USER"), + 'password': os.getenv("PASSWORD") + }) - asyncio.get_event_loop().run_forever() + monitor = TelegramChannelMonitor( + session_name='session_trueosint', + api_id=os.getenv("TELETHON_API_ID"), + api_hash=os.getenv("TELETHON_API_HASH"), + channel_username='trueosint', + source_name='trueosint' + ) + + scheduler = BackgroundScheduler() + scheduler.add_job( + monitor.fetch_last_post, + 'cron', + hour=9, + minute=0, + timezone=timezone("Europe/Moscow") + ) + + try: + scheduler.start() + logging.info("Scheduler started successfully") + while True: + pass + except (KeyboardInterrupt, SystemExit): + scheduler.shutdown() + logging.info("Scheduler shut down successfully") if __name__ == '__main__': main() diff --git a/tg/tg_node_1.py b/tg/tg_node_1.py index 8d8b8ec..1d813d1 100644 --- a/tg/tg_node_1.py +++ b/tg/tg_node_1.py @@ -1,34 +1,48 @@ import asyncio import os -from apscheduler.schedulers.asyncio import AsyncIOScheduler +from apscheduler.schedulers.background import BackgroundScheduler from pytz import timezone from tg_crawler import TelegramChannelMonitor from dotenv import load_dotenv +import logging + load_dotenv() -TelegramChannelMonitor.set_db_config({ - 'host': os.getenv("HOST"), - 'port': os.getenv("PORT"), - 'database': os.getenv("DBNAME"), - 'user': os.getenv("USER"), - 'password': os.getenv("PASSWORD") -}) - -monitor = TelegramChannelMonitor( - session_name='session_dataleak', - api_id=os.getenv("TELETHON_API_ID"), - api_hash=os.getenv("TELETHON_API_HASH"), - channel_username='dataleak', - source_name='dataleak' -) - def main(): - scheduler = AsyncIOScheduler() - scheduler.add_job(monitor.fetch_last_post, "cron", hour=9, minute=0, timezone=timezone("Europe/Moscow")) - scheduler.start() + TelegramChannelMonitor.set_db_config({ + 'host': os.getenv("HOST"), + 'port': os.getenv("PORT"), + 'database': os.getenv("DBNAME"), + 'user': os.getenv("USER"), + 'password': os.getenv("PASSWORD") + }) - asyncio.get_event_loop().run_forever() + monitor = TelegramChannelMonitor( + session_name='session_trueosint', + api_id=os.getenv("TELETHON_API_ID"), + api_hash=os.getenv("TELETHON_API_HASH"), + channel_username='dataleak', + source_name='dataleak' + ) + + scheduler = BackgroundScheduler() + scheduler.add_job( + monitor.fetch_last_post, + 'cron', + hour=9, + minute=0, + timezone=timezone("Europe/Moscow") + ) + + try: + scheduler.start() + logging.info("Scheduler started successfully") + while True: + pass + except (KeyboardInterrupt, SystemExit): + scheduler.shutdown() + logging.info("Scheduler shut down successfully") if __name__ == '__main__': main() diff --git a/web/app.py b/web/app.py index f4f8cec..8d64821 100644 --- a/web/app.py +++ b/web/app.py @@ -1,9 +1,13 @@ import os from flask import Flask, render_template import psycopg2 +from datetime import datetime +import docker app = Flask(__name__) +docker_client = docker.from_env() + DB_CONFIG = { 'host': os.getenv("HOST"), 'port': os.getenv("PORT"), @@ -12,23 +16,124 @@ DB_CONFIG = { 'password': os.getenv("PASSWORD") } +def parse_log_line(line): + """Парсит строку лога в формате: [timestamp] сообщение""" + try: + line = line.strip() + if line.startswith('[') and ']' in line: + return line + return f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {line}" + except: + return line + +def get_parser_status(): + """Получает статус парсеров из Docker""" + try: + containers = docker_client.containers.list(all=True) + status = { + 'tg-node-0': False, + 'tg-node-1': False, + 'total': 0, + 'active': 0, + 'errors': 0 + } + + for container in containers: + if container.name == 'tg-node-0': + status['tg-node-0'] = container.status == 'running' + elif container.name == 'tg-node-1': + status['tg-node-1'] = container.status == 'running' + + status['active'] = sum([status['tg-node-0'], status['tg-node-1']]) + status['errors'] = 2 - status['active'] # Просто для примера + status['total'] = 2 + + return status + except Exception as e: + print(f"Error getting docker status: {e}") + return None + +def get_leaks_stats(): + """Получает статистику по утечкам из базы данных""" + try: + conn = psycopg2.connect(**DB_CONFIG) + cursor = conn.cursor() + + cursor.execute("SELECT COUNT(*) FROM leaks") + total_leaks = cursor.fetchone()[0] + + cursor.execute(""" + SELECT DATE(created_at) as day, COUNT(*) + FROM leaks + WHERE created_at >= CURRENT_DATE - INTERVAL '30 days' + GROUP BY day + ORDER BY day + """) + leaks_by_day = cursor.fetchall() + + cursor.execute(""" + SELECT resource_name, COUNT(*) + FROM leaks + GROUP BY resource_name + ORDER BY COUNT(*) DESC + LIMIT 3 + """) + leaks_by_source = cursor.fetchall() + + cursor.execute(""" + SELECT resource_name, message, created_at + FROM leaks + ORDER BY created_at DESC + LIMIT 10 + """) + recent_leaks = cursor.fetchall() + + return { + 'total_leaks': total_leaks, + 'leaks_by_day': leaks_by_day, + 'leaks_by_source': leaks_by_source, + 'recent_leaks': recent_leaks + } + except Exception as e: + print(f"Database error: {e}") + return None + finally: + if 'conn' in locals(): + conn.close() + @app.route("/") def index(): - with psycopg2.connect(**DB_CONFIG) as conn: - with conn.cursor() as cur: - cur.execute("SELECT source, message, created_at FROM leaks ORDER BY created_at DESC LIMIT 50") - leaks = cur.fetchall() - return render_template("index.html", leaks=leaks) + parser_status = get_parser_status() + leaks_stats = get_leaks_stats() + + if not leaks_stats: + leaks_stats = { + 'total_leaks': 150, + 'leaks_by_day': [(datetime.now().date(), 5)], + 'leaks_by_source': [('Telegram', 80), ('Форум', 50), ('Даркнет', 20)], + 'recent_leaks': [('Telegram', 'Новая утечка данных', datetime.now())] + } + + return render_template( + "index.html", + parser_status=parser_status, + leaks_stats=leaks_stats + ) @app.route("/logs") def logs(): - log_path = os.path.join(os.path.dirname(__file__), '..', 'tg_nodes.log') + log_path = '/app/tg_nodes.log' try: with open(log_path, 'r', encoding='utf-8') as f: lines = f.readlines()[-100:] + parsed_logs = [parse_log_line(line) for line in lines if line.strip()] except FileNotFoundError: - lines = ["Лог-файл не найден"] - return render_template("logs.html", logs=lines) + parsed_logs = ["[ERROR] Лог-файл не найден"] + except Exception as e: + parsed_logs = [f"[ERROR] Ошибка чтения лог-файла: {str(e)}"] + + return render_template("logs.html", logs=parsed_logs) + if __name__ == "__main__": - app.run(debug=True) + app.run(host='0.0.0.0', port=5000) diff --git a/web/templates/index.html b/web/templates/index.html index 6fdcb7a..6108875 100644 --- a/web/templates/index.html +++ b/web/templates/index.html @@ -1,5 +1,6 @@ + @@ -17,6 +18,7 @@ min-height: 100vh; overflow: hidden; } + .sidebar { width: 250px; background-color: #2e2e2e; @@ -30,13 +32,16 @@ bottom: 0; z-index: 1; } + .sidebar.collapsed { width: 60px; } + .sidebar.collapsed h2, .sidebar.collapsed ul li a span { display: none; } + .sidebar h2 { color: #3399FF; text-align: center; @@ -44,13 +49,16 @@ font-size: 24px; transition: opacity 0.3s ease; } + .sidebar ul { list-style: none; padding: 0; } + .sidebar ul li { margin: 15px 0; } + .sidebar ul li a { color: #ffffff; text-decoration: none; @@ -59,16 +67,20 @@ display: flex; align-items: center; } + .sidebar ul li a:hover { color: #3399FF; } + .sidebar ul li a i { margin-right: 10px; font-size: 20px; } + .sidebar ul li a span { transition: opacity 0.3s ease; } + .toggle-btn { position: fixed; left: 10px; @@ -81,6 +93,7 @@ cursor: pointer; z-index: 1000; } + .container { flex: 1; padding: 20px; @@ -91,42 +104,50 @@ position: relative; z-index: 0; } + .container.collapsed { margin-left: 60px; } + h1 { color: #3399FF; text-align: center; margin-bottom: 20px; } + .grid { display: grid; grid-template-columns: repeat(2, 1fr); gap: 20px; margin-top: 20px; } + .card { background-color: #2e2e2e; padding: 20px; border-radius: 8px; box-shadow: 0 0 5px rgba(0, 0, 0, 0.2); } + #map { height: 400px; border-radius: 8px; grid-column: span 2; } + .screenshots { display: flex; overflow-x: auto; gap: 10px; padding: 10px; } + .screenshots img { max-height: 200px; border-radius: 5px; border: 2px solid #3399FF; } + .logs { background-color: #2e2e2e; padding: 15px; @@ -137,10 +158,11 @@ font-size: 14px; color: #3399FF; } + .logs p { margin: 5px 0; } - + .leaflet-marker-icon { background-color: #3399FF; border: 2px solid #ffffff; @@ -149,10 +171,12 @@ height: 20px !important; box-shadow: 0 0 10px rgba(51, 153, 255, 0.5); } + .leaflet-popup-content { color: #1e1e1e; font-size: 14px; } + .leaflet-popup-content-wrapper { border-radius: 8px; box-shadow: 0 0 10px rgba(0, 0, 0, 0.3); @@ -160,6 +184,7 @@ +

Состояние парсеров

- +
+

TG Node 0: + + {% if parser_status['tg-node-0'] %}Активен{% else %}Ошибка{% endif %} + +

+

TG Node 1: + + {% if parser_status['tg-node-1'] %}Активен{% else %}Ошибка{% endif %} + +

+ +

Источники утечек

@@ -229,19 +266,26 @@ + \ No newline at end of file diff --git a/web/templates/logs.html b/web/templates/logs.html index a9eb2eb..57c6e1e 100644 --- a/web/templates/logs.html +++ b/web/templates/logs.html @@ -218,15 +218,13 @@
-

Логи системы

-
-

[2023-10-01 12:34] Парсер форума запущен.

-

[2023-10-01 12:35] Найдена новая утечка на форуме X.

-

[2023-10-01 12:36] Ошибка парсера Telegram: timeout.

-

[2023-10-01 12:37] Утечка данных в Москве зафиксирована.

-

[2023-10-01 12:38] Парсер даркнета завершил работу.

-

[2023-10-01 12:39] Новый скриншот добавлен в базу.

- +

Логи системы (tg_nodes.log)

+
+ {% for log in logs %} +

{{ log }}

+ {% else %} +

Нет записей в логах

+ {% endfor %}