fix: logging

This commit is contained in:
Глеб Новицкий 2025-04-13 21:15:50 +03:00
parent 9c80afa351
commit 166ad0ba2f
8 changed files with 297 additions and 86 deletions

View File

@ -7,21 +7,41 @@ services:
ports:
- "5000:5000"
volumes:
- .:/app
- ./web:/app/web
- /var/run/docker.sock:/var/run/docker.sock
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
read_only: true
environment:
- PYTHONUNBUFFERED=1
privileged: true
restart: unless-stopped
command: python web/app.py
depends_on:
- tg_node_0
- tg_node_1
tg_node_0:
build: .
volumes:
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
environment:
- PYTHONUNBUFFERED=1
container_name: tg-node-0
command: python tg/node_tg_0.py
restart: unless-stopped
command: python tg/tg_node_0.py
tg_node_1:
build: .
volumes:
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
environment:
- PYTHONUNBUFFERED=1
container_name: tg-node-1
command: python tg/node_tg_1.py
restart: unless-stopped
command: python tg/tg_node_1.py
volumes:
pg_data:
pg_data:

View File

@ -7,4 +7,5 @@ selenium
grpc_interceptor_headers
telethon
schedule
psycopg2-binary
psycopg2-binary
docker

View File

@ -1,14 +1,18 @@
import logging
import os
import psycopg2
from datetime import datetime
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetHistoryRequest
LOG_FILE = os.getenv('LOG_FILE', '/app/tg_nodes.log')
if os.path.exists(LOG_FILE) and os.path.isdir(LOG_FILE):
raise RuntimeError(f"Path {LOG_FILE} is a directory! Expected file.")
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s',
handlers=[
logging.FileHandler('tg_nodes.log'),
logging.FileHandler(LOG_FILE),
logging.StreamHandler()
]
)

View File

@ -1,34 +1,47 @@
import asyncio
import os
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.schedulers.background import BackgroundScheduler
from pytz import timezone
from tg_crawler import TelegramChannelMonitor
from dotenv import load_dotenv
import logging
load_dotenv()
TelegramChannelMonitor.set_db_config({
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
monitor = TelegramChannelMonitor(
session_name='session_trueosint',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='trueosint',
source_name='trueosint'
)
def main():
scheduler = AsyncIOScheduler()
scheduler.add_job(monitor.fetch_last_post, "cron", hour=9, minute=0, timezone=timezone("Europe/Moscow"))
scheduler.start()
TelegramChannelMonitor.set_db_config({
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
asyncio.get_event_loop().run_forever()
monitor = TelegramChannelMonitor(
session_name='session_trueosint',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='trueosint',
source_name='trueosint'
)
scheduler = BackgroundScheduler()
scheduler.add_job(
monitor.fetch_last_post,
'cron',
hour=9,
minute=0,
timezone=timezone("Europe/Moscow")
)
try:
scheduler.start()
logging.info("Scheduler started successfully")
while True:
pass
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
logging.info("Scheduler shut down successfully")
if __name__ == '__main__':
main()

View File

@ -1,34 +1,48 @@
import asyncio
import os
from apscheduler.schedulers.asyncio import AsyncIOScheduler
from apscheduler.schedulers.background import BackgroundScheduler
from pytz import timezone
from tg_crawler import TelegramChannelMonitor
from dotenv import load_dotenv
import logging
load_dotenv()
TelegramChannelMonitor.set_db_config({
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
monitor = TelegramChannelMonitor(
session_name='session_dataleak',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='dataleak',
source_name='dataleak'
)
def main():
scheduler = AsyncIOScheduler()
scheduler.add_job(monitor.fetch_last_post, "cron", hour=9, minute=0, timezone=timezone("Europe/Moscow"))
scheduler.start()
TelegramChannelMonitor.set_db_config({
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
})
asyncio.get_event_loop().run_forever()
monitor = TelegramChannelMonitor(
session_name='session_trueosint',
api_id=os.getenv("TELETHON_API_ID"),
api_hash=os.getenv("TELETHON_API_HASH"),
channel_username='dataleak',
source_name='dataleak'
)
scheduler = BackgroundScheduler()
scheduler.add_job(
monitor.fetch_last_post,
'cron',
hour=9,
minute=0,
timezone=timezone("Europe/Moscow")
)
try:
scheduler.start()
logging.info("Scheduler started successfully")
while True:
pass
except (KeyboardInterrupt, SystemExit):
scheduler.shutdown()
logging.info("Scheduler shut down successfully")
if __name__ == '__main__':
main()

View File

@ -1,9 +1,13 @@
import os
from flask import Flask, render_template
import psycopg2
from datetime import datetime
import docker
app = Flask(__name__)
docker_client = docker.from_env()
DB_CONFIG = {
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
@ -12,23 +16,124 @@ DB_CONFIG = {
'password': os.getenv("PASSWORD")
}
def parse_log_line(line):
"""Парсит строку лога в формате: [timestamp] сообщение"""
try:
line = line.strip()
if line.startswith('[') and ']' in line:
return line
return f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {line}"
except:
return line
def get_parser_status():
"""Получает статус парсеров из Docker"""
try:
containers = docker_client.containers.list(all=True)
status = {
'tg-node-0': False,
'tg-node-1': False,
'total': 0,
'active': 0,
'errors': 0
}
for container in containers:
if container.name == 'tg-node-0':
status['tg-node-0'] = container.status == 'running'
elif container.name == 'tg-node-1':
status['tg-node-1'] = container.status == 'running'
status['active'] = sum([status['tg-node-0'], status['tg-node-1']])
status['errors'] = 2 - status['active'] # Просто для примера
status['total'] = 2
return status
except Exception as e:
print(f"Error getting docker status: {e}")
return None
def get_leaks_stats():
"""Получает статистику по утечкам из базы данных"""
try:
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM leaks")
total_leaks = cursor.fetchone()[0]
cursor.execute("""
SELECT DATE(created_at) as day, COUNT(*)
FROM leaks
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY day
ORDER BY day
""")
leaks_by_day = cursor.fetchall()
cursor.execute("""
SELECT resource_name, COUNT(*)
FROM leaks
GROUP BY resource_name
ORDER BY COUNT(*) DESC
LIMIT 3
""")
leaks_by_source = cursor.fetchall()
cursor.execute("""
SELECT resource_name, message, created_at
FROM leaks
ORDER BY created_at DESC
LIMIT 10
""")
recent_leaks = cursor.fetchall()
return {
'total_leaks': total_leaks,
'leaks_by_day': leaks_by_day,
'leaks_by_source': leaks_by_source,
'recent_leaks': recent_leaks
}
except Exception as e:
print(f"Database error: {e}")
return None
finally:
if 'conn' in locals():
conn.close()
@app.route("/")
def index():
with psycopg2.connect(**DB_CONFIG) as conn:
with conn.cursor() as cur:
cur.execute("SELECT source, message, created_at FROM leaks ORDER BY created_at DESC LIMIT 50")
leaks = cur.fetchall()
return render_template("index.html", leaks=leaks)
parser_status = get_parser_status()
leaks_stats = get_leaks_stats()
if not leaks_stats:
leaks_stats = {
'total_leaks': 150,
'leaks_by_day': [(datetime.now().date(), 5)],
'leaks_by_source': [('Telegram', 80), ('Форум', 50), ('Даркнет', 20)],
'recent_leaks': [('Telegram', 'Новая утечка данных', datetime.now())]
}
return render_template(
"index.html",
parser_status=parser_status,
leaks_stats=leaks_stats
)
@app.route("/logs")
def logs():
log_path = os.path.join(os.path.dirname(__file__), '..', 'tg_nodes.log')
log_path = '/app/tg_nodes.log'
try:
with open(log_path, 'r', encoding='utf-8') as f:
lines = f.readlines()[-100:]
parsed_logs = [parse_log_line(line) for line in lines if line.strip()]
except FileNotFoundError:
lines = ["Лог-файл не найден"]
return render_template("logs.html", logs=lines)
parsed_logs = ["[ERROR] Лог-файл не найден"]
except Exception as e:
parsed_logs = [f"[ERROR] Ошибка чтения лог-файла: {str(e)}"]
return render_template("logs.html", logs=parsed_logs)
if __name__ == "__main__":
app.run(debug=True)
app.run(host='0.0.0.0', port=5000)

View File

@ -1,5 +1,6 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
@ -17,6 +18,7 @@
min-height: 100vh;
overflow: hidden;
}
.sidebar {
width: 250px;
background-color: #2e2e2e;
@ -30,13 +32,16 @@
bottom: 0;
z-index: 1;
}
.sidebar.collapsed {
width: 60px;
}
.sidebar.collapsed h2,
.sidebar.collapsed ul li a span {
display: none;
}
.sidebar h2 {
color: #3399FF;
text-align: center;
@ -44,13 +49,16 @@
font-size: 24px;
transition: opacity 0.3s ease;
}
.sidebar ul {
list-style: none;
padding: 0;
}
.sidebar ul li {
margin: 15px 0;
}
.sidebar ul li a {
color: #ffffff;
text-decoration: none;
@ -59,16 +67,20 @@
display: flex;
align-items: center;
}
.sidebar ul li a:hover {
color: #3399FF;
}
.sidebar ul li a i {
margin-right: 10px;
font-size: 20px;
}
.sidebar ul li a span {
transition: opacity 0.3s ease;
}
.toggle-btn {
position: fixed;
left: 10px;
@ -81,6 +93,7 @@
cursor: pointer;
z-index: 1000;
}
.container {
flex: 1;
padding: 20px;
@ -91,42 +104,50 @@
position: relative;
z-index: 0;
}
.container.collapsed {
margin-left: 60px;
}
h1 {
color: #3399FF;
text-align: center;
margin-bottom: 20px;
}
.grid {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 20px;
margin-top: 20px;
}
.card {
background-color: #2e2e2e;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 5px rgba(0, 0, 0, 0.2);
}
#map {
height: 400px;
border-radius: 8px;
grid-column: span 2;
}
.screenshots {
display: flex;
overflow-x: auto;
gap: 10px;
padding: 10px;
}
.screenshots img {
max-height: 200px;
border-radius: 5px;
border: 2px solid #3399FF;
}
.logs {
background-color: #2e2e2e;
padding: 15px;
@ -137,10 +158,11 @@
font-size: 14px;
color: #3399FF;
}
.logs p {
margin: 5px 0;
}
.leaflet-marker-icon {
background-color: #3399FF;
border: 2px solid #ffffff;
@ -149,10 +171,12 @@
height: 20px !important;
box-shadow: 0 0 10px rgba(51, 153, 255, 0.5);
}
.leaflet-popup-content {
color: #1e1e1e;
font-size: 14px;
}
.leaflet-popup-content-wrapper {
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.3);
@ -160,6 +184,7 @@
</style>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
</head>
<body>
<!-- Боковое меню -->
<div class="sidebar" id="sidebar">
@ -196,7 +221,19 @@
</div>
<div class="card">
<h2>Состояние парсеров</h2>
<canvas id="parsersStatusChart"></canvas>
<div class="parsers-status">
<p>TG Node 0:
<span class="status-{% if parser_status['tg-node-0'] %}active{% else %}error{% endif %}">
{% if parser_status['tg-node-0'] %}Активен{% else %}Ошибка{% endif %}
</span>
</p>
<p>TG Node 1:
<span class="status-{% if parser_status['tg-node-1'] %}active{% else %}error{% endif %}">
{% if parser_status['tg-node-1'] %}Активен{% else %}Ошибка{% endif %}
</span>
</p>
<canvas id="parsersStatusChart"></canvas>
</div>
</div>
<div class="card">
<h2>Источники утечек</h2>
@ -229,19 +266,26 @@
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://unpkg.com/leaflet@1.7.1/dist/leaflet.js"></script>
<script>
// График утечек за месяц
const monthlyLeaksCtx = document.getElementById('monthlyLeaksChart').getContext('2d');
new Chart(monthlyLeaksCtx, {
type: 'line',
data: {
labels: ['1', '2', '3', '4', '5', '6', '7', '8', '9', '10', '11', '12', '13', '14', '15'],
labels: [
{% for day in leaks_stats['leaks_by_day'] %}
"{{ day[0].strftime('%d.%m') }}"{% if not loop.last %}, {% endif %}
{% endfor %}
],
datasets: [{
label: 'Утечки',
data: [12, 19, 3, 5, 2, 3, 15, 20, 10, 8, 12, 14, 18, 20, 22],
data: [
{% for day in leaks_stats['leaks_by_day'] %}
{{ day[1] }}{% if not loop.last %}, {% endif %}
{% endfor %}
],
borderColor: '#3399FF',
tension: 0.1,
fill: true,
backgroundColor: 'rgba(51, 153, 255, 0.1)',
backgroundColor: 'rgba(51, 153, 255, 0.1)'
}]
},
options: {
@ -249,7 +293,7 @@
plugins: {
legend: {
labels: {
color: '#fff',
color: '#fff'
}
}
},
@ -257,24 +301,23 @@
y: {
beginAtZero: true,
grid: {
color: '#444',
color: '#444'
},
ticks: {
color: '#fff',
color: '#fff'
}
},
x: {
grid: {
color: '#444',
color: '#444'
},
ticks: {
color: '#fff',
color: '#fff'
}
}
}
}
});
// График состояния парсеров
const parsersStatusCtx = document.getElementById('parsersStatusChart').getContext('2d');
new Chart(parsersStatusCtx, {
@ -283,7 +326,11 @@
labels: ['Активны', 'Ошибки', 'Неактивны'],
datasets: [{
label: 'Состояние',
data: [8, 1, 1],
data: [
{{ parser_status['active'] }},
{{ parser_status['errors'] }},
0 // Неактивных нет в текущей логике
],
backgroundColor: ['#3399FF', '#ff4444', '#666666'],
}]
},
@ -304,10 +351,18 @@
new Chart(sourcesCtx, {
type: 'bar',
data: {
labels: ['Форумы', 'Даркнет', 'Telegram'],
labels: [
{% for source in leaks_stats['leaks_by_source'] %}
'{{ source[0] }}'{% if not loop.last %},{% endif %}
{% endfor %}
],
datasets: [{
label: 'Количество утечек',
data: [45, 30, 25],
data: [
{% for source in leaks_stats['leaks_by_source'] %}
{{ source[1] }}{% if not loop.last %},{% endif %}
{% endfor %}
],
backgroundColor: ['#3399FF', '#00cc99', '#0099cc'],
}]
},
@ -365,4 +420,5 @@
});
</script>
</body>
</html>

View File

@ -218,15 +218,13 @@
<!-- Контейнер для логов -->
<div class="logs-container">
<h2>Логи системы</h2>
<div class="logs">
<p>[2023-10-01 12:34] Парсер форума запущен.</p>
<p>[2023-10-01 12:35] Найдена новая утечка на форуме X.</p>
<p>[2023-10-01 12:36] Ошибка парсера Telegram: timeout.</p>
<p>[2023-10-01 12:37] Утечка данных в Москве зафиксирована.</p>
<p>[2023-10-01 12:38] Парсер даркнета завершил работу.</p>
<p>[2023-10-01 12:39] Новый скриншот добавлен в базу.</p>
<!-- Добавьте больше логов по необходимости -->
<h2>Логи системы (tg_nodes.log)</h2>
<div class="logs" id="logs-container">
{% for log in logs %}
<p>{{ log }}</p>
{% else %}
<p>Нет записей в логах</p>
{% endfor %}
</div>
</div>
</div>