fix: logging

This commit is contained in:
Глеб Новицкий 2025-04-13 21:15:50 +03:00
parent 9c80afa351
commit 166ad0ba2f
8 changed files with 297 additions and 86 deletions

View File

@ -7,21 +7,41 @@ services:
ports: ports:
- "5000:5000" - "5000:5000"
volumes: volumes:
- .:/app - ./web:/app/web
- /var/run/docker.sock:/var/run/docker.sock
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
read_only: true
environment:
- PYTHONUNBUFFERED=1
privileged: true
restart: unless-stopped
command: python web/app.py command: python web/app.py
depends_on:
- tg_node_0
- tg_node_1
tg_node_0: tg_node_0:
build: . build: .
volumes:
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
environment:
- PYTHONUNBUFFERED=1
container_name: tg-node-0 container_name: tg-node-0
command: python tg/node_tg_0.py restart: unless-stopped
command: python tg/tg_node_0.py
tg_node_1: tg_node_1:
build: . build: .
volumes:
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
environment:
- PYTHONUNBUFFERED=1
container_name: tg-node-1 container_name: tg-node-1
command: python tg/node_tg_1.py restart: unless-stopped
command: python tg/tg_node_1.py
volumes: volumes:
pg_data: pg_data:

View File

@ -7,4 +7,5 @@ selenium
grpc_interceptor_headers grpc_interceptor_headers
telethon telethon
schedule schedule
psycopg2-binary psycopg2-binary
docker

View File

@ -1,14 +1,18 @@
import logging import logging
import os
import psycopg2 import psycopg2
from datetime import datetime from datetime import datetime
from telethon.sync import TelegramClient from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetHistoryRequest from telethon.tl.functions.messages import GetHistoryRequest
LOG_FILE = os.getenv('LOG_FILE', '/app/tg_nodes.log')
if os.path.exists(LOG_FILE) and os.path.isdir(LOG_FILE):
raise RuntimeError(f"Path {LOG_FILE} is a directory! Expected file.")
logging.basicConfig( logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[ handlers=[
logging.FileHandler('tg_nodes.log'), logging.FileHandler(LOG_FILE),
logging.StreamHandler() logging.StreamHandler()
] ]
) )

View File

@ -1,34 +1,47 @@
import asyncio import asyncio
import os import os
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.background import BackgroundScheduler
from pytz import timezone from pytz import timezone
from tg_crawler import TelegramChannelMonitor from tg_crawler import TelegramChannelMonitor
from dotenv import load_dotenv from dotenv import load_dotenv
import logging
load_dotenv() load_dotenv()
TelegramChannelMonitor.set_db_config({
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
monitor = TelegramChannelMonitor(
session_name='session_trueosint',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='trueosint',
source_name='trueosint'
)
def main(): def main():
scheduler = AsyncIOScheduler() TelegramChannelMonitor.set_db_config({
scheduler.add_job(monitor.fetch_last_post, "cron", hour=9, minute=0, timezone=timezone("Europe/Moscow")) 'host': os.getenv("HOST"),
scheduler.start() 'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
asyncio.get_event_loop().run_forever() monitor = TelegramChannelMonitor(
session_name='session_trueosint',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='trueosint',
source_name='trueosint'
)
scheduler = BackgroundScheduler()
scheduler.add_job(
monitor.fetch_last_post,
'cron',
hour=9,
minute=0,
timezone=timezone("Europe/Moscow")
)
try:
scheduler.start()
logging.info("Scheduler started successfully")
while True:
pass
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
logging.info("Scheduler shut down successfully")
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,34 +1,48 @@
import asyncio import asyncio
import os import os
from apscheduler.schedulers.asyncio import AsyncIOScheduler from apscheduler.schedulers.background import BackgroundScheduler
from pytz import timezone from pytz import timezone
from tg_crawler import TelegramChannelMonitor from tg_crawler import TelegramChannelMonitor
from dotenv import load_dotenv from dotenv import load_dotenv
import logging
load_dotenv() load_dotenv()
TelegramChannelMonitor.set_db_config({
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
monitor = TelegramChannelMonitor(
session_name='session_dataleak',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='dataleak',
source_name='dataleak'
)
def main(): def main():
scheduler = AsyncIOScheduler() TelegramChannelMonitor.set_db_config({
scheduler.add_job(monitor.fetch_last_post, "cron", hour=9, minute=0, timezone=timezone("Europe/Moscow")) 'host': os.getenv("HOST"),
scheduler.start() 'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
asyncio.get_event_loop().run_forever() monitor = TelegramChannelMonitor(
session_name='session_trueosint',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='dataleak',
source_name='dataleak'
)
scheduler = BackgroundScheduler()
scheduler.add_job(
monitor.fetch_last_post,
'cron',
hour=9,
minute=0,
timezone=timezone("Europe/Moscow")
)
try:
scheduler.start()
logging.info("Scheduler started successfully")
while True:
pass
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
logging.info("Scheduler shut down successfully")
if __name__ == '__main__': if __name__ == '__main__':
main() main()

View File

@ -1,9 +1,13 @@
import os import os
from flask import Flask, render_template from flask import Flask, render_template
import psycopg2 import psycopg2
from datetime import datetime
import docker
app = Flask(__name__) app = Flask(__name__)
docker_client = docker.from_env()
DB_CONFIG = { DB_CONFIG = {
'host': os.getenv("HOST"), 'host': os.getenv("HOST"),
'port': os.getenv("PORT"), 'port': os.getenv("PORT"),
@ -12,23 +16,124 @@ DB_CONFIG = {
'password': os.getenv("PASSWORD") 'password': os.getenv("PASSWORD")
} }
def parse_log_line(line):
"""Парсит строку лога в формате: [timestamp] сообщение"""
try:
line = line.strip()
if line.startswith('[') and ']' in line:
return line
return f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {line}"
except:
return line
def get_parser_status():
"""Получает статус парсеров из Docker"""
try:
containers = docker_client.containers.list(all=True)
status = {
'tg-node-0': False,
'tg-node-1': False,
'total': 0,
'active': 0,
'errors': 0
}
for container in containers:
if container.name == 'tg-node-0':
status['tg-node-0'] = container.status == 'running'
elif container.name == 'tg-node-1':
status['tg-node-1'] = container.status == 'running'
status['active'] = sum([status['tg-node-0'], status['tg-node-1']])
status['errors'] = 2 - status['active'] # Просто для примера
status['total'] = 2
return status
except Exception as e:
print(f"Error getting docker status: {e}")
return None
def get_leaks_stats():
"""Получает статистику по утечкам из базы данных"""
try:
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM leaks")
total_leaks = cursor.fetchone()[0]
cursor.execute("""
SELECT DATE(created_at) as day, COUNT(*)
FROM leaks
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY day
ORDER BY day
""")
leaks_by_day = cursor.fetchall()
cursor.execute("""
SELECT resource_name, COUNT(*)
FROM leaks
GROUP BY resource_name
ORDER BY COUNT(*) DESC
LIMIT 3
""")
leaks_by_source = cursor.fetchall()
cursor.execute("""
SELECT resource_name, message, created_at
FROM leaks
ORDER BY created_at DESC
LIMIT 10
""")
recent_leaks = cursor.fetchall()
return {
'total_leaks': total_leaks,
'leaks_by_day': leaks_by_day,
'leaks_by_source': leaks_by_source,
'recent_leaks': recent_leaks
}
except Exception as e:
print(f"Database error: {e}")
return None
finally:
if 'conn' in locals():
conn.close()
@app.route("/") @app.route("/")
def index(): def index():
with psycopg2.connect(**DB_CONFIG) as conn: parser_status = get_parser_status()
with conn.cursor() as cur: leaks_stats = get_leaks_stats()
cur.execute("SELECT source, message, created_at FROM leaks ORDER BY created_at DESC LIMIT 50")
leaks = cur.fetchall() if not leaks_stats:
return render_template("index.html", leaks=leaks) leaks_stats = {
'total_leaks': 150,
'leaks_by_day': [(datetime.now().date(), 5)],
'leaks_by_source': [('Telegram', 80), ('Форум', 50), ('Даркнет', 20)],
'recent_leaks': [('Telegram', 'Новая утечка данных', datetime.now())]
}
return render_template(
"index.html",
parser_status=parser_status,
leaks_stats=leaks_stats
)
@app.route("/logs") @app.route("/logs")
def logs(): def logs():
log_path = os.path.join(os.path.dirname(__file__), '..', 'tg_nodes.log') log_path = '/app/tg_nodes.log'
try: try:
with open(log_path, 'r', encoding='utf-8') as f: with open(log_path, 'r', encoding='utf-8') as f:
lines = f.readlines()[-100:] lines = f.readlines()[-100:]
parsed_logs = [parse_log_line(line) for line in lines if line.strip()]
except FileNotFoundError: except FileNotFoundError:
lines = ["Лог-файл не найден"] parsed_logs = ["[ERROR] Лог-файл не найден"]
return render_template("logs.html", logs=lines) except Exception as e:
parsed_logs = [f"[ERROR] Ошибка чтения лог-файла: {str(e)}"]
return render_template("logs.html", logs=parsed_logs)
if __name__ == "__main__": if __name__ == "__main__":
app.run(debug=True) app.run(host='0.0.0.0', port=5000)

View File

@ -1,5 +1,6 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="ru"> <html lang="ru">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
@ -17,6 +18,7 @@
min-height: 100vh; min-height: 100vh;
overflow: hidden; overflow: hidden;
} }
.sidebar { .sidebar {
width: 250px; width: 250px;
background-color: #2e2e2e; background-color: #2e2e2e;
@ -30,13 +32,16 @@
bottom: 0; bottom: 0;
z-index: 1; z-index: 1;
} }
.sidebar.collapsed { .sidebar.collapsed {
width: 60px; width: 60px;
} }
.sidebar.collapsed h2, .sidebar.collapsed h2,
.sidebar.collapsed ul li a span { .sidebar.collapsed ul li a span {
display: none; display: none;
} }
.sidebar h2 { .sidebar h2 {
color: #3399FF; color: #3399FF;
text-align: center; text-align: center;
@ -44,13 +49,16 @@
font-size: 24px; font-size: 24px;
transition: opacity 0.3s ease; transition: opacity 0.3s ease;
} }
.sidebar ul { .sidebar ul {
list-style: none; list-style: none;
padding: 0; padding: 0;
} }
.sidebar ul li { .sidebar ul li {
margin: 15px 0; margin: 15px 0;
} }
.sidebar ul li a { .sidebar ul li a {
color: #ffffff; color: #ffffff;
text-decoration: none; text-decoration: none;
@ -59,16 +67,20 @@
display: flex; display: flex;
align-items: center; align-items: center;
} }
.sidebar ul li a:hover { .sidebar ul li a:hover {
color: #3399FF; color: #3399FF;
} }
.sidebar ul li a i { .sidebar ul li a i {
margin-right: 10px; margin-right: 10px;
font-size: 20px; font-size: 20px;
} }
.sidebar ul li a span { .sidebar ul li a span {
transition: opacity 0.3s ease; transition: opacity 0.3s ease;
} }
.toggle-btn { .toggle-btn {
position: fixed; position: fixed;
left: 10px; left: 10px;
@ -81,6 +93,7 @@
cursor: pointer; cursor: pointer;
z-index: 1000; z-index: 1000;
} }
.container { .container {
flex: 1; flex: 1;
padding: 20px; padding: 20px;
@ -91,42 +104,50 @@
position: relative; position: relative;
z-index: 0; z-index: 0;
} }
.container.collapsed { .container.collapsed {
margin-left: 60px; margin-left: 60px;
} }
h1 { h1 {
color: #3399FF; color: #3399FF;
text-align: center; text-align: center;
margin-bottom: 20px; margin-bottom: 20px;
} }
.grid { .grid {
display: grid; display: grid;
grid-template-columns: repeat(2, 1fr); grid-template-columns: repeat(2, 1fr);
gap: 20px; gap: 20px;
margin-top: 20px; margin-top: 20px;
} }
.card { .card {
background-color: #2e2e2e; background-color: #2e2e2e;
padding: 20px; padding: 20px;
border-radius: 8px; border-radius: 8px;
box-shadow: 0 0 5px rgba(0, 0, 0, 0.2); box-shadow: 0 0 5px rgba(0, 0, 0, 0.2);
} }
#map { #map {
height: 400px; height: 400px;
border-radius: 8px; border-radius: 8px;
grid-column: span 2; grid-column: span 2;
} }
.screenshots { .screenshots {
display: flex; display: flex;
overflow-x: auto; overflow-x: auto;
gap: 10px; gap: 10px;
padding: 10px; padding: 10px;
} }
.screenshots img { .screenshots img {
max-height: 200px; max-height: 200px;
border-radius: 5px; border-radius: 5px;
border: 2px solid #3399FF; border: 2px solid #3399FF;
} }
.logs { .logs {
background-color: #2e2e2e; background-color: #2e2e2e;
padding: 15px; padding: 15px;
@ -137,10 +158,11 @@
font-size: 14px; font-size: 14px;
color: #3399FF; color: #3399FF;
} }
.logs p { .logs p {
margin: 5px 0; margin: 5px 0;
} }
.leaflet-marker-icon { .leaflet-marker-icon {
background-color: #3399FF; background-color: #3399FF;
border: 2px solid #ffffff; border: 2px solid #ffffff;
@ -149,10 +171,12 @@
height: 20px !important; height: 20px !important;
box-shadow: 0 0 10px rgba(51, 153, 255, 0.5); box-shadow: 0 0 10px rgba(51, 153, 255, 0.5);
} }
.leaflet-popup-content { .leaflet-popup-content {
color: #1e1e1e; color: #1e1e1e;
font-size: 14px; font-size: 14px;
} }
.leaflet-popup-content-wrapper { .leaflet-popup-content-wrapper {
border-radius: 8px; border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.3); box-shadow: 0 0 10px rgba(0, 0, 0, 0.3);
@ -160,6 +184,7 @@
</style> </style>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css"> <link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
</head> </head>
<body> <body>
<!-- Боковое меню --> <!-- Боковое меню -->
<div class="sidebar" id="sidebar"> <div class="sidebar" id="sidebar">
@ -196,7 +221,19 @@
</div> </div>
<div class="card"> <div class="card">
<h2>Состояние парсеров</h2> <h2>Состояние парсеров</h2>
<canvas id="parsersStatusChart"></canvas> <div class="parsers-status">
<p>TG Node 0:
<span class="status-{% if parser_status['tg-node-0'] %}active{% else %}error{% endif %}">
{% if parser_status['tg-node-0'] %}Активен{% else %}Ошибка{% endif %}
</span>
</p>
<p>TG Node 1:
<span class="status-{% if parser_status['tg-node-1'] %}active{% else %}error{% endif %}">
{% if parser_status['tg-node-1'] %}Активен{% else %}Ошибка{% endif %}
</span>
</p>
<canvas id="parsersStatusChart"></canvas>
</div>
</div> </div>
<div class="card"> <div class="card">
<h2>Источники утечек</h2> <h2>Источники утечек</h2>
@ -229,19 +266,26 @@
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script> <script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://unpkg.com/leaflet@1.7.1/dist/leaflet.js"></script> <script src="https://unpkg.com/leaflet@1.7.1/dist/leaflet.js"></script>
<script> <script>
// График утечек за месяц
const monthlyLeaksCtx = document.getElementById('monthlyLeaksChart').getContext('2d'); const monthlyLeaksCtx = document.getElementById('monthlyLeaksChart').getContext('2d');
new Chart(monthlyLeaksCtx, { new Chart(monthlyLeaksCtx, {
type: 'line', type: 'line',
data: { data: {
labels: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'], labels: [
{% for day in leaks_stats['leaks_by_day'] %}
"{{ day[0].strftime('%d.%m') }}"{% if not loop.last %}, {% endif %}
{% endfor %}
],
datasets: [{ datasets: [{
label: 'Утечки', label: 'Утечки',
data: [12, 19, 3, 5, 2, 3, 15, 20, 10, 8, 12, 14, 18, 20, 22], data: [
{% for day in leaks_stats['leaks_by_day'] %}
{{ day[1] }}{% if not loop.last %}, {% endif %}
{% endfor %}
],
borderColor: '#3399FF', borderColor: '#3399FF',
tension: 0.1, tension: 0.1,
fill: true, fill: true,
backgroundColor: 'rgba(51, 153, 255, 0.1)', backgroundColor: 'rgba(51, 153, 255, 0.1)'
}] }]
}, },
options: { options: {
@ -249,7 +293,7 @@
plugins: { plugins: {
legend: { legend: {
labels: { labels: {
color: '#fff', color: '#fff'
} }
} }
}, },
@ -257,24 +301,23 @@
y: { y: {
beginAtZero: true, beginAtZero: true,
grid: { grid: {
color: '#444', color: '#444'
}, },
ticks: { ticks: {
color: '#fff', color: '#fff'
} }
}, },
x: { x: {
grid: { grid: {
color: '#444', color: '#444'
}, },
ticks: { ticks: {
color: '#fff', color: '#fff'
} }
} }
} }
} }
}); });
// График состояния парсеров // График состояния парсеров
const parsersStatusCtx = document.getElementById('parsersStatusChart').getContext('2d'); const parsersStatusCtx = document.getElementById('parsersStatusChart').getContext('2d');
new Chart(parsersStatusCtx, { new Chart(parsersStatusCtx, {
@ -283,7 +326,11 @@
labels: ['Активны', 'Ошибки', 'Неактивны'], labels: ['Активны', 'Ошибки', 'Неактивны'],
datasets: [{ datasets: [{
label: 'Состояние', label: 'Состояние',
data: [8, 1, 1], data: [
{{ parser_status['active'] }},
{{ parser_status['errors'] }},
0 // Неактивных нет в текущей логике
],
backgroundColor: ['#3399FF', '#ff4444', '#666666'], backgroundColor: ['#3399FF', '#ff4444', '#666666'],
}] }]
}, },
@ -304,10 +351,18 @@
new Chart(sourcesCtx, { new Chart(sourcesCtx, {
type: 'bar', type: 'bar',
data: { data: {
labels: ['Форумы', 'Даркнет', 'Telegram'], labels: [
{% for source in leaks_stats['leaks_by_source'] %}
'{{ source[0] }}'{% if not loop.last %},{% endif %}
{% endfor %}
],
datasets: [{ datasets: [{
label: 'Количество утечек', label: 'Количество утечек',
data: [45, 30, 25], data: [
{% for source in leaks_stats['leaks_by_source'] %}
{{ source[1] }}{% if not loop.last %},{% endif %}
{% endfor %}
],
backgroundColor: ['#3399FF', '#00cc99', '#0099cc'], backgroundColor: ['#3399FF', '#00cc99', '#0099cc'],
}] }]
}, },
@ -365,4 +420,5 @@
}); });
</script> </script>
</body> </body>
</html> </html>

View File

@ -218,15 +218,13 @@
<!-- Контейнер для логов --> <!-- Контейнер для логов -->
<div class="logs-container"> <div class="logs-container">
<h2>Логи системы</h2> <h2>Логи системы (tg_nodes.log)</h2>
<div class="logs"> <div class="logs" id="logs-container">
<p>[2023-10-01 12:34] Парсер форума запущен.</p> {% for log in logs %}
<p>[2023-10-01 12:35] Найдена новая утечка на форуме X.</p> <p>{{ log }}</p>
<p>[2023-10-01 12:36] Ошибка парсера Telegram: timeout.</p> {% else %}
<p>[2023-10-01 12:37] Утечка данных в Москве зафиксирована.</p> <p>Нет записей в логах</p>
<p>[2023-10-01 12:38] Парсер даркнета завершил работу.</p> {% endfor %}
<p>[2023-10-01 12:39] Новый скриншот добавлен в базу.</p>
<!-- Добавьте больше логов по необходимости -->
</div> </div>
</div> </div>
</div> </div>