Compare commits

...

6 Commits
main ... main

17 changed files with 1212 additions and 432 deletions

6
.gitignore vendored
View File

@ -1,3 +1,7 @@
.env
*.log
**/__pycache__/
**/__pycache__/
/web/static/*.mp4
/celerybeat-schedule.bak
/celerybeat-schedule.dat
/celerybeat-schedule.dir

6
Dockerfile Normal file
View File

@ -0,0 +1,6 @@
FROM python:3.12-slim
WORKDIR /app
COPY . /app
RUN pip install --no-cache-dir -r requirements.txt
EXPOSE 5000
CMD ["python", "web/app.py"]

37
celery_app.py Normal file
View File

@ -0,0 +1,37 @@
from celery import Celery
from kombu import Queue
app = Celery(
'leak_monitor',
broker='memory://localhost',
# backend='rpc://',
include=['tasks.tg_crawler', 'tasks.forum_crawler']
)
app.conf.update(
worker_pool='solo',
worker_max_tasks_per_child=100,
task_serializer='json',
result_serializer='json',
accept_content=['json'],
timezone='Europe/Moscow',
enable_utc=True,
)
app.conf.task_queues = (
Queue('telegram', routing_key='telegram.#'),
Queue('forum', routing_key='forum.#'),
)
app.conf.beat_schedule = {
'monitor-telegram-channels': {
'task': 'tasks.tg_crawler.monitor_channel',
'schedule': 10.0,
'options': {'queue': 'telegram'}
},
'crawl-forum': {
'task': 'forum_crawler.crawl_forum_task',
'schedule': 3600.0 * 24, # час
'args': ('https://thehackernews.com/search/label/data%20breach', [])
},
}

19
celery_beat.py Normal file
View File

@ -0,0 +1,19 @@
from celery_app import app
from utils.logg import LoggerSingleton
logger = LoggerSingleton.get_logger()
if __name__ == '__main__':
logger.info("Starting Celery beat scheduler...")
try:
app.loader.import_default_modules()
beat = app.Beat(
logfile=None,
loglevel='info',
socket_timeout=30
)
beat.run()
except Exception as e:
logger.error(f"Beat failed: {e}")
raise

47
docker-compose.yaml Normal file
View File

@ -0,0 +1,47 @@
version: '3.8'
services:
web:
build: .
container_name: tg-monitor
ports:
- "5000:5000"
volumes:
- ./web:/app/web
- /var/run/docker.sock:/var/run/docker.sock
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
read_only: true
environment:
- PYTHONUNBUFFERED=1
privileged: true
restart: unless-stopped
command: python web/app.py
tg_node_0:
build: .
volumes:
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
environment:
- PYTHONUNBUFFERED=1
container_name: tg-node-0
restart: unless-stopped
command: python tg/tg_node_0.py
tg_node_1:
build: .
volumes:
- type: bind
source: ./tg_nodes.log
target: /app/tg_nodes.log
environment:
- PYTHONUNBUFFERED=1
container_name: tg-node-1
restart: unless-stopped
command: python tg/tg_node_1.py
volumes:
pg_data:

View File

@ -27,34 +27,30 @@ class DriverCreator:
).create_extension()
def get_driver(self):
'''
Отключает JS
Каждый запрос получает `сырой` html
'''
# extension_path = self._switch_proxy()
options = uc.ChromeOptions()
# options.add_argument(f"--load-extension={extension_path}") # временно
options.add_argument("--headless=new")
# options.add_argument("--headless=new")
options.add_argument("--disable-gpu")
options.add_argument("--disable-dev-shm-usage")
options.add_argument("--no-sandbox")
options.add_argument("--disable-webgl")
options.add_argument("--disable-software-rasterizer")
# options.add_argument("--disable-webgl")
# options.add_argument("--disable-software-rasterizer")
# options.add_argument("--disable-extensions")
prefs = {"profile.managed_default_content_settings.javascript": 2}
options.experimental_options["prefs"] = prefs
# prefs = {"profile.managed_default_content_settings.javascript": 2}
# options.experimental_options["prefs"] = prefs
driver = uc.Chrome(
options=options,
version_main=132,
version_main=135,
# user_multi_procs=True
)
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
"source": "Object.defineProperty(navigator, 'javaEnabled', {get: () => false});"
})
driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True})
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
# "source": "Object.defineProperty(navigator, 'javaEnabled', {get: () => false});"
# })
# driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True})
return driver

28
initiator.py Normal file
View File

@ -0,0 +1,28 @@
from celery_app import app
from utils.logg import LoggerSingleton
logger = LoggerSingleton.get_logger()
# Брокер центральный узел, который:
# Принимает задачи от beat (планировщика)
# Распределяет их по очередям (telegram, forum).
# Передаёт задачи воркерам, которые подписаны на эти очереди.
def main():
try:
logger.info("Starting Celery worker...")
app.worker_main(
argv=[
'worker',
'--loglevel=debug',
'--pool=solo',
'-Q', 'telegram',
'--without-heartbeat',
'--without-gossip'
]
)
except Exception as e:
logger.error(f"Failed to start worker: {e}")
if __name__ == '__main__':
main()

View File

@ -1,402 +0,0 @@
#!/usr/bin/env python3
# this module is part of undetected_chromedriver
from distutils.version import LooseVersion
import io
import json
import logging
import os
import pathlib
import platform
import random
import re
import shutil
import string
import sys
import time
from urllib.request import urlopen
from urllib.request import urlretrieve
import zipfile
from multiprocessing import Lock
import secrets
logger = logging.getLogger(__name__)
IS_POSIX = sys.platform.startswith(("darwin", "cygwin", "linux", "linux2"))
class Patcher(object):
lock = Lock()
exe_name = "chromedriver%s"
platform = sys.platform
if platform.endswith("win32"):
d = "~/appdata/roaming/undetected_chromedriver"
elif "LAMBDA_TASK_ROOT" in os.environ:
d = "/tmp/undetected_chromedriver"
elif platform.startswith(("linux", "linux2")):
d = "~/.local/share/undetected_chromedriver"
elif platform.endswith("darwin"):
d = "~/Library/Application Support/undetected_chromedriver"
else:
d = "~/.undetected_chromedriver"
data_path = os.path.abspath(os.path.expanduser(d))
def __init__(
self,
executable_path=None,
force=False,
version_main: int = 0,
user_multi_procs=False,
):
"""
Args:
executable_path: None = automatic
a full file path to the chromedriver executable
force: False
terminate processes which are holding lock
version_main: 0 = auto
specify main chrome version (rounded, ex: 82)
"""
self.force = force
self._custom_exe_path = False
prefix = secrets.token_hex(8)
self.user_multi_procs = user_multi_procs
self.is_old_chromedriver = version_main and version_main <= 114
# Needs to be called before self.exe_name is accessed
self._set_platform_name()
if not os.path.exists(self.data_path):
os.makedirs(self.data_path, exist_ok=True)
if not executable_path:
self.executable_path = os.path.join(
self.data_path, "_".join([prefix, self.exe_name])
)
if not IS_POSIX:
if executable_path:
if not executable_path[-4:] == ".exe":
executable_path += ".exe"
self.zip_path = os.path.join(self.data_path, prefix)
if not executable_path:
if not self.user_multi_procs:
self.executable_path = os.path.abspath(
os.path.join(".", self.executable_path)
)
if executable_path:
self._custom_exe_path = True
self.executable_path = executable_path
# Set the correct repository to download the Chromedriver from
if self.is_old_chromedriver:
self.url_repo = "https://chromedriver.storage.googleapis.com"
else:
self.url_repo = "https://googlechromelabs.github.io/chrome-for-testing"
self.version_main = version_main
self.version_full = None
def _set_platform_name(self):
"""
Set the platform and exe name based on the platform undetected_chromedriver is running on
in order to download the correct chromedriver.
"""
if self.platform.endswith("win32"):
self.platform_name = "win32"
self.exe_name %= ".exe"
if self.platform.endswith(("linux", "linux2")):
self.platform_name = "linux64"
self.exe_name %= ""
if self.platform.endswith("darwin"):
if self.is_old_chromedriver:
self.platform_name = "mac64"
else:
self.platform_name = "mac-x64"
self.exe_name %= ""
def auto(self, executable_path=None, force=False, version_main=None, _=None):
"""
Args:
executable_path:
force:
version_main:
Returns:
"""
p = pathlib.Path(self.data_path)
if self.user_multi_procs:
with Lock():
files = list(p.rglob("*chromedriver*"))
most_recent = max(files, key=lambda f: f.stat().st_mtime)
files.remove(most_recent)
list(map(lambda f: f.unlink(), files))
if self.is_binary_patched(most_recent):
self.executable_path = str(most_recent)
return True
if executable_path:
self.executable_path = executable_path
self._custom_exe_path = True
if self._custom_exe_path:
ispatched = self.is_binary_patched(self.executable_path)
if not ispatched:
return self.patch_exe()
else:
return
if version_main:
self.version_main = version_main
if force is True:
self.force = force
try:
os.unlink(self.executable_path)
except PermissionError:
if self.force:
self.force_kill_instances(self.executable_path)
return self.auto(force=not self.force)
try:
if self.is_binary_patched():
# assumes already running AND patched
return True
except PermissionError:
pass
# return False
except FileNotFoundError:
pass
release = self.fetch_release_number()
self.version_main = release.version[0]
self.version_full = release
self.unzip_package(self.fetch_package())
return self.patch()
def driver_binary_in_use(self, path: str = None) -> bool:
"""
naive test to check if a found chromedriver binary is
currently in use
Args:
path: a string or PathLike object to the binary to check.
if not specified, we check use this object's executable_path
"""
if not path:
path = self.executable_path
p = pathlib.Path(path)
if not p.exists():
raise OSError("file does not exist: %s" % p)
try:
with open(p, mode="a+b") as fs:
exc = []
try:
fs.seek(0, 0)
except PermissionError as e:
exc.append(e) # since some systems apprently allow seeking
# we conduct another test
try:
fs.readline()
except PermissionError as e:
exc.append(e)
if exc:
return True
return False
# ok safe to assume this is in use
except Exception as e:
# logger.exception("whoops ", e)
pass
def cleanup_unused_files(self):
p = pathlib.Path(self.data_path)
items = list(p.glob("*undetected*"))
for item in items:
try:
item.unlink()
except:
pass
def patch(self):
self.patch_exe()
return self.is_binary_patched()
def fetch_release_number(self):
"""
Gets the latest major version available, or the latest major version of self.target_version if set explicitly.
:return: version string
:rtype: LooseVersion
"""
# Endpoint for old versions of Chromedriver (114 and below)
if self.is_old_chromedriver:
path = f"/latest_release_{self.version_main}"
path = path.upper()
logger.debug("getting release number from %s" % path)
return LooseVersion(urlopen(self.url_repo + path).read().decode())
# Endpoint for new versions of Chromedriver (115+)
if not self.version_main:
# Fetch the latest version
path = "/last-known-good-versions-with-downloads.json"
logger.debug("getting release number from %s" % path)
with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
last_versions = json.loads(response)
return LooseVersion(last_versions["channels"]["Stable"]["version"])
# Fetch the latest minor version of the major version provided
path = "/latest-versions-per-milestone-with-downloads.json"
logger.debug("getting release number from %s" % path)
with urlopen(self.url_repo + path) as conn:
response = conn.read().decode()
major_versions = json.loads(response)
return LooseVersion(major_versions["milestones"][str(self.version_main)]["version"])
def parse_exe_version(self):
with io.open(self.executable_path, "rb") as f:
for line in iter(lambda: f.readline(), b""):
match = re.search(rb"platform_handle\x00content\x00([0-9.]*)", line)
if match:
return LooseVersion(match[1].decode())
def fetch_package(self):
"""
Downloads ChromeDriver from source
:return: path to downloaded file
"""
zip_name = f"chromedriver_{self.platform_name}.zip"
if self.is_old_chromedriver:
download_url = "%s/%s/%s" % (self.url_repo, self.version_full.vstring, zip_name)
else:
zip_name = zip_name.replace("_", "-", 1)
download_url = "https://storage.googleapis.com/chrome-for-testing-public/%s/%s/%s"
download_url %= (self.version_full.vstring, self.platform_name, zip_name)
logger.debug("downloading from %s" % download_url)
return urlretrieve(download_url)[0]
def unzip_package(self, fp):
"""
Does what it says
:return: path to unpacked executable
"""
exe_path = self.exe_name
if not self.is_old_chromedriver:
# The new chromedriver unzips into its own folder
zip_name = f"chromedriver-{self.platform_name}"
exe_path = os.path.join(zip_name, self.exe_name)
logger.debug("unzipping %s" % fp)
try:
os.unlink(self.zip_path)
except (FileNotFoundError, OSError):
pass
os.makedirs(self.zip_path, mode=0o755, exist_ok=True)
with zipfile.ZipFile(fp, mode="r") as zf:
zf.extractall(self.zip_path)
os.rename(os.path.join(self.zip_path, exe_path), self.executable_path)
os.remove(fp)
shutil.rmtree(self.zip_path)
os.chmod(self.executable_path, 0o755)
return self.executable_path
@staticmethod
def force_kill_instances(exe_name):
"""
kills running instances.
:param: executable name to kill, may be a path as well
:return: True on success else False
"""
exe_name = os.path.basename(exe_name)
if IS_POSIX:
r = os.system("kill -f -9 $(pidof %s)" % exe_name)
else:
r = os.system("taskkill /f /im %s" % exe_name)
return not r
@staticmethod
def gen_random_cdc():
cdc = random.choices(string.ascii_letters, k=27)
return "".join(cdc).encode()
def is_binary_patched(self, executable_path=None):
executable_path = executable_path or self.executable_path
try:
with io.open(executable_path, "rb") as fh:
return fh.read().find(b"undetected chromedriver") != -1
except FileNotFoundError:
return False
def patch_exe(self):
start = time.perf_counter()
logger.info("patching driver executable %s" % self.executable_path)
with io.open(self.executable_path, "r+b") as fh:
content = fh.read()
# match_injected_codeblock = re.search(rb"{window.*;}", content)
match_injected_codeblock = re.search(rb"\{window\.cdc.*?;\}", content)
if match_injected_codeblock:
target_bytes = match_injected_codeblock[0]
new_target_bytes = (
b'{console.log("undetected chromedriver 1337!")}'.ljust(
len(target_bytes), b" "
)
)
new_content = content.replace(target_bytes, new_target_bytes)
if new_content == content:
logger.warning(
"something went wrong patching the driver binary. could not find injection code block"
)
else:
logger.debug(
"found block:\n%s\nreplacing with:\n%s"
% (target_bytes, new_target_bytes)
)
fh.seek(0)
fh.write(new_content)
logger.debug(
"patching took us {:.2f} seconds".format(time.perf_counter() - start)
)
def __repr__(self):
return "{0:s}({1:s})".format(
self.__class__.__name__,
self.executable_path,
)
def __del__(self):
if self._custom_exe_path:
# if the driver binary is specified by user
# we assume it is important enough to not delete it
return
else:
timeout = 3 # stop trying after this many seconds
t = time.monotonic()
now = lambda: time.monotonic()
while now() - t > timeout:
# we don't want to wait until the end of time
try:
if self.user_multi_procs:
break
os.unlink(self.executable_path)
logger.debug("successfully unlinked %s" % self.executable_path)
break
except (OSError, RuntimeError, PermissionError):
time.sleep(0.01)
continue
except FileNotFoundError:
break

View File

@ -4,4 +4,12 @@ python-dotenv
tls-client
undetected-chromedriver
selenium
grpc_interceptor_headers
grpc_interceptor_headers
telethon
schedule
psycopg2-binary
docker
asyncio
pytz
flask
apscheduler

View File

@ -1,14 +0,0 @@
#!/bin/bash
TARGET_DIR=$(python -c "import undetected_chromedriver as uc; print(uc.__path__[0])" 2>/dev/null)
if [ -d "$TARGET_DIR" ]; then
echo "patcher.py в $TARGET_DIR"
cp patcher.py "$TARGET_DIR"
else
echo "undetected_chromedriver не найден"
fi
pip install -r requirements.txt
python initiator.py

0
tasks/__init__.py Normal file
View File

97
tasks/forum_crawler.py Normal file
View File

@ -0,0 +1,97 @@
from bs4 import BeautifulSoup
import os
import psycopg2
from dotenv import load_dotenv
from celery import Celery
from utils.logg import LoggerSingleton
from driver.driver_creator import DriverCreator
from selenium.webdriver.common.by import By
from time import sleep
from celery_app import app
load_dotenv()
logger = LoggerSingleton.get_logger()
class ForumCrawler:
def __init__(self, forum_url, proxy_list):
self.forum_url = forum_url
self.proxy_list = proxy_list
self.db_config = {
'host': os.getenv('DB_HOST'),
'port': os.getenv('DB_PORT'),
'database': os.getenv('DB_NAME'),
'user': os.getenv('DB_USER'),
'password': os.getenv('DB_PASSWORD')
}
self.conn = None
self._connect()
def _connect(self):
try:
self.conn = psycopg2.connect(**self.db_config)
logger.info("Database connection established")
except Exception as e:
logger.error(f"Database connection error: {e}")
raise
def _close(self):
if self.conn and not self.conn.closed:
try:
self.conn.close()
logger.info("Database connection closed")
except Exception as e:
logger.error(f"Error closing database connection: {e}")
def _crawl_leaks(self):
driver_creator = DriverCreator(self.proxy_list)
driver = driver_creator.get_driver()
try:
logger.info(f"Starting forum crawl: {self.forum_url}")
driver.get(self.forum_url)
sleep(2)
posts = driver.find_elements(By.CSS_SELECTOR, 'a.story-link h2.home-title')
if not posts:
logger.info("No posts found on the page")
return
last_post = posts[0]
title = last_post.text.strip()
link = last_post.get_attribute('href')
post_content = f"{title} - {link}"
if 'data breach' in title.lower() or 'leak' in title.lower():
self._save_to_db('Hacker News', post_content)
logger.info(f"New leak found: {title} - {link}")
else:
logger.info("Last post is not about leaks")
except Exception as e:
logger.error(f"Error during forum crawling: {e}")
raise
finally:
driver.quit()
logger.info("WebDriver session closed")
def _save_to_db(self, source, message):
if not self.conn or self.conn.closed:
self._connect()
try:
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO leaks (resource_name, message) VALUES (%s, %s)",
(source, message)
)
self.conn.commit()
logger.info(f"Leak from {source} saved in the database")
except Exception as e:
logger.error(f"Error writing to the database: {e}")
@app.task(name='forum_crawler.crawl_forum_task')
def crawl_forum_task(forum_url, proxy_list):
crawler = ForumCrawler(forum_url, proxy_list)
try:
crawler._crawl_leaks()
finally:
crawler._close()

121
tasks/tg_crawler.py Normal file
View File

@ -0,0 +1,121 @@
import logging
import os
import asyncio
import psycopg2
from datetime import datetime
from telethon.sync import TelegramClient
from telethon.tl.functions.messages import GetHistoryRequest
from dotenv import load_dotenv
from celery import Celery
from utils.logg import LoggerSingleton
from celery_app import app
load_dotenv()
logger = LoggerSingleton.get_logger()
class TelegramMonitor:
def __init__(self):
self.db_config = {
'host': os.getenv('DB_HOST'),
'port': os.getenv('DB_PORT'),
'database': os.getenv('DB_NAME'),
'user': os.getenv('DB_USER'),
'password': os.getenv('DB_PASSWORD')
}
self.conn = None
self._connect()
def _connect(self):
try:
self.conn = psycopg2.connect(**self.db_config)
logger.info("Database connection established")
except Exception as e:
logger.error(f"Database connection error: {e}")
raise
def _close(self):
if self.conn and not self.conn.closed:
try:
self.conn.close()
logger.info("Database connection closed")
except Exception as e:
logger.error(f"Error closing database connection: {e}")
def _fetch_post(self, channel_username, source_name):
try:
with TelegramClient('session', os.getenv('API_ID'), os.getenv('API_HASH')) as client:
entity = client.get_entity(channel_username)
history = client(GetHistoryRequest(
peer=entity,
limit=1,
offset_date=None,
offset_id=0,
max_id=0,
min_id=0,
add_offset=0,
hash=0
))
if history.messages:
self._save_to_db(source_name, history.messages[0].message)
except Exception as e:
logger.error(f"Error fetching post from {source_name}: {e}")
raise
def _save_to_db(self, source, message):
if not self.conn or self.conn.closed:
self._connect()
try:
with self.conn.cursor() as cur:
cur.execute(
"INSERT INTO leaks (resource_name, message) VALUES (%s, %s)",
(source, message)
)
self.conn.commit()
logger.info(f"Data from {source} saved successfully")
except Exception as e:
logger.error(f"Database save error for {source}: {e}")
self.conn.rollback()
raise
@app.task(bind=True, name='tasks.tg_crawler.monitor_channels')
def monitor_channels(self):
channels = [
('trueosint', 'trueosint'),
('dataleak', 'dataleak'),
('Vaultofdataleaksss', 'Vaultofdataleaksss')
]
logger.info("Starting Telegram channels monitoring")
for channel, source in channels:
try:
monitor_channel.apply_async(
args=(channel, source),
queue='telegram',
retry=True,
retry_policy={
'max_retries': 3,
'interval_start': 2,
'interval_step': 5,
'interval_max': 20
}
)
logger.info(f"Sent task for channel: {channel}")
except Exception as e:
logger.error(f"Failed to send task for {channel}: {e}")
raise self.retry(exc=e)
@app.task(bind=True, name='tasks.tg_crawler.monitor_channel')
def monitor_channel(self, channel, source):
logger.info(f"Starting monitoring channel: {channel}")
monitor = TelegramMonitor()
try:
monitor._fetch_post(channel, source)
logger.info(f"Successfully monitored channel: {channel}")
except Exception as e:
logger.error(f"Channel monitoring failed for {channel}: {e}")
raise self.retry(exc=e)
finally:
monitor._close()

27
utils/logg.py Normal file
View File

@ -0,0 +1,27 @@
import logging
class LoggerSingleton:
_logger = None
@staticmethod
def get_logger():
if LoggerSingleton._logger is None:
LoggerSingleton._logger = logging.getLogger(__name__)
LoggerSingleton._logger.setLevel(logging.INFO)
formatter = logging.Formatter(
"%(asctime)s - %(name)s - %(levelname)s - %(message)s",
datefmt="%Y-%m-%d %H:%M:%S"
)
console_handler = logging.StreamHandler()
console_handler.setFormatter(formatter)
file_handler = logging.FileHandler('tempora.log', encoding='utf-8')
file_handler.setFormatter(formatter)
LoggerSingleton._logger.addHandler(console_handler)
LoggerSingleton._logger.addHandler(file_handler)
return LoggerSingleton._logger

141
web/app.py Normal file
View File

@ -0,0 +1,141 @@
import os
from flask import Flask, render_template
import psycopg2
from datetime import datetime
import docker
app = Flask(__name__)
docker_client = docker.from_env()
DB_CONFIG = {
'host': os.getenv("HOST"),
'port': os.getenv("PORT"),
'database': os.getenv("DBNAME"),
'user': os.getenv("USER"),
'password': os.getenv("PASSWORD")
}
def parse_log_line(line):
"""Парсит строку лога в формате: [timestamp] сообщение"""
try:
line = line.strip()
if line.startswith('[') and ']' in line:
return line
return f"[{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}] {line}"
except:
return line
def get_parser_status():
"""Получает статус парсеров из Docker"""
try:
containers = docker_client.containers.list(all=True)
status = {
'tg-node-0': False,
'tg-node-1': False,
'total': 0,
'active': 0,
'errors': 0
}
for container in containers:
if container.name == 'tg-node-0':
status['tg-node-0'] = container.status == 'running'
elif container.name == 'tg-node-1':
status['tg-node-1'] = container.status == 'running'
status['active'] = sum([status['tg-node-0'], status['tg-node-1']])
status['errors'] = 2 - status['active'] # Просто для примера
status['total'] = 2
return status
except Exception as e:
print(f"Error getting docker status: {e}")
return None
def get_leaks_stats():
"""Получает статистику по утечкам из базы данных"""
try:
conn = psycopg2.connect(**DB_CONFIG)
cursor = conn.cursor()
cursor.execute("SELECT COUNT(*) FROM leaks")
total_leaks = cursor.fetchone()[0]
cursor.execute("""
SELECT DATE(created_at) as day, COUNT(*)
FROM leaks
WHERE created_at >= CURRENT_DATE - INTERVAL '30 days'
GROUP BY day
ORDER BY day
""")
leaks_by_day = cursor.fetchall()
cursor.execute("""
SELECT resource_name, COUNT(*)
FROM leaks
GROUP BY resource_name
ORDER BY COUNT(*) DESC
LIMIT 3
""")
leaks_by_source = cursor.fetchall()
cursor.execute("""
SELECT resource_name, message, created_at
FROM leaks
ORDER BY created_at DESC
LIMIT 10
""")
recent_leaks = cursor.fetchall()
return {
'total_leaks': total_leaks,
'leaks_by_day': leaks_by_day,
'leaks_by_source': leaks_by_source,
'recent_leaks': recent_leaks
}
except Exception as e:
print(f"Database error: {e}")
return None
finally:
if 'conn' in locals():
conn.close()
@app.route("/")
@app.route("/index.html")
def index():
parser_status = get_parser_status()
leaks_stats = get_leaks_stats()
if not leaks_stats:
leaks_stats = {
'total_leaks': 150,
'leaks_by_day': [(datetime.now().date(), 5)],
'leaks_by_source': [('Telegram', 80), ('Форум', 50), ('Даркнет', 20)],
'recent_leaks': [('Telegram', 'Новая утечка данных', datetime.now())]
}
return render_template(
"index.html",
parser_status=parser_status,
leaks_stats=leaks_stats
)
@app.route("/logs.html")
@app.route("/logs")
def logs():
log_path = '/app/tg_nodes.log'
try:
with open(log_path, 'r', encoding='utf-8') as f:
lines = f.readlines()[-100:]
parsed_logs = [parse_log_line(line) for line in lines if line.strip()]
except FileNotFoundError:
parsed_logs = ["[ERROR] Лог-файл не найден"]
except Exception as e:
parsed_logs = [f"[ERROR] Ошибка чтения лог-файла: {str(e)}"]
return render_template("logs.html", logs=parsed_logs)
if __name__ == "__main__":
app.run(host='0.0.0.0', port=5000)

424
web/templates/index.html Normal file
View File

@ -0,0 +1,424 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Система мониторинга утечек данных</title>
<link rel="stylesheet" href="https://unpkg.com/leaflet@1.7.1/dist/leaflet.css" />
<style>
body {
background-color: #1e1e1e;
color: #ffffff;
font-family: 'Arial', sans-serif;
margin: 0;
padding: 0;
display: flex;
transition: margin-left 0.3s ease;
min-height: 100vh;
overflow: hidden;
}
.sidebar {
width: 250px;
background-color: #2e2e2e;
padding: 20px;
box-shadow: 2px 0 5px rgba(0, 0, 0, 0.3);
transition: width 0.3s ease;
overflow-y: auto;
position: fixed;
top: 0;
left: 0;
bottom: 0;
z-index: 1;
}
.sidebar.collapsed {
width: 60px;
}
.sidebar.collapsed h2,
.sidebar.collapsed ul li a span {
display: none;
}
.sidebar h2 {
color: #3399FF;
text-align: center;
margin-bottom: 20px;
font-size: 24px;
transition: opacity 0.3s ease;
}
.sidebar ul {
list-style: none;
padding: 0;
}
.sidebar ul li {
margin: 15px 0;
}
.sidebar ul li a {
color: #ffffff;
text-decoration: none;
font-size: 16px;
transition: color 0.3s ease;
display: flex;
align-items: center;
}
.sidebar ul li a:hover {
color: #3399FF;
}
.sidebar ul li a i {
margin-right: 10px;
font-size: 20px;
}
.sidebar ul li a span {
transition: opacity 0.3s ease;
}
.toggle-btn {
position: fixed;
left: 10px;
top: 10px;
background-color: #3399FF;
border: none;
color: #fff;
padding: 10px;
border-radius: 5px;
cursor: pointer;
z-index: 1000;
}
.container {
flex: 1;
padding: 20px;
margin-left: 250px;
transition: margin-left 0.3s ease;
overflow-y: auto;
height: 100vh;
position: relative;
z-index: 0;
}
.container.collapsed {
margin-left: 60px;
}
h1 {
color: #3399FF;
text-align: center;
margin-bottom: 20px;
}
.grid {
display: grid;
grid-template-columns: repeat(2, 1fr);
gap: 20px;
margin-top: 20px;
}
.card {
background-color: #2e2e2e;
padding: 20px;
border-radius: 8px;
box-shadow: 0 0 5px rgba(0, 0, 0, 0.2);
}
#map {
height: 400px;
border-radius: 8px;
grid-column: span 2;
}
.screenshots {
display: flex;
overflow-x: auto;
gap: 10px;
padding: 10px;
}
.screenshots img {
max-height: 200px;
border-radius: 5px;
border: 2px solid #3399FF;
}
.logs {
background-color: #2e2e2e;
padding: 15px;
border-radius: 8px;
height: 200px;
overflow-y: auto;
font-family: 'Courier New', monospace;
font-size: 14px;
color: #3399FF;
}
.logs p {
margin: 5px 0;
}
.leaflet-marker-icon {
background-color: #3399FF;
border: 2px solid #ffffff;
border-radius: 50%;
width: 20px !important;
height: 20px !important;
box-shadow: 0 0 10px rgba(51, 153, 255, 0.5);
}
.leaflet-popup-content {
color: #1e1e1e;
font-size: 14px;
}
.leaflet-popup-content-wrapper {
border-radius: 8px;
box-shadow: 0 0 10px rgba(0, 0, 0, 0.3);
}
</style>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
</head>
<body>
<!-- Боковое меню -->
<div class="sidebar" id="sidebar">
<h2>Меню</h2>
<ul>
<li><a href="#"><i class="fas fa-home"></i><span>Главная</span></a></li>
<li><a href="#"><i class="fas fa-cog"></i><span>Настройки</span></a></li>
<li><a href="#"><i class="fas fa-chart-line"></i><span>Отчеты</span></a></li>
<li><a href="#"><i class="fas fa-code"></i><span>Парсеры</span></a></li>
<li><a href="logs.html"><i class="fas fa-file-alt"></i><span>Логи</span></a></li>
</ul>
</div>
<!-- Кнопка для сворачивания/разворачивания меню -->
<button class="toggle-btn" id="toggle-btn">
<i class="fas fa-bars"></i>
</button>
<!-- Основной контент -->
<div class="container" id="main-content">
<h1>Система мониторинга утечек данных</h1>
<!-- Карта -->
<div class="card">
<h2>Карта утечек</h2>
<div id="map"></div>
</div>
<!-- Графики, скриншоты и логи -->
<div class="grid">
<div class="card">
<h2>Утечки за месяц</h2>
<canvas id="monthlyLeaksChart"></canvas>
</div>
<div class="card">
<h2>Состояние парсеров</h2>
<div class="parsers-status">
<p>TG Node 0:
<span class="status-{% if parser_status['tg-node-0'] %}active{% else %}error{% endif %}">
{% if parser_status['tg-node-0'] %}Активен{% else %}Ошибка{% endif %}
</span>
</p>
<p>TG Node 1:
<span class="status-{% if parser_status['tg-node-1'] %}active{% else %}error{% endif %}">
{% if parser_status['tg-node-1'] %}Активен{% else %}Ошибка{% endif %}
</span>
</p>
<canvas id="parsersStatusChart"></canvas>
</div>
</div>
<div class="card">
<h2>Источники утечек</h2>
<canvas id="sourcesChart"></canvas>
</div>
<div class="card">
<h2>Скриншоты с форумов</h2>
<div class="screenshots">
<img src="https://via.placeholder.com/300x200.png?text=Скриншот+1" alt="Скриншот 1">
<img src="https://via.placeholder.com/300x200.png?text=Скриншот+2" alt="Скриншот 2">
<img src="https://via.placeholder.com/300x200.png?text=Скриншот+3" alt="Скриншот 3">
<img src="https://via.placeholder.com/300x200.png?text=Скриншот+4" alt="Скриншот 4">
<img src="https://via.placeholder.com/300x200.png?text=Скриншот+5" alt="Скриншот 5">
</div>
</div>
<div class="card" style="grid-column: span 2;">
<h2>Логи системы</h2>
<div class="logs">
<p>[2023-10-01 12:34] Парсер форума запущен.</p>
<p>[2023-10-01 12:35] Найдена новая утечка на форуме X.</p>
<p>[2023-10-01 12:36] Ошибка парсера Telegram: timeout.</p>
<p>[2023-10-01 12:37] Утечка данных в Москве зафиксирована.</p>
<p>[2023-10-01 12:38] Парсер даркнета завершил работу.</p>
<p>[2023-10-01 12:39] Новый скриншот добавлен в базу.</p>
</div>
</div>
</div>
</div>
<script src="https://cdn.jsdelivr.net/npm/chart.js"></script>
<script src="https://unpkg.com/leaflet@1.7.1/dist/leaflet.js"></script>
<script>
const monthlyLeaksCtx = document.getElementById('monthlyLeaksChart').getContext('2d');
new Chart(monthlyLeaksCtx, {
type: 'line',
data: {
labels: [
{% for day in leaks_stats['leaks_by_day'] %}
"{{ day[0].strftime('%d.%m') }}"{% if not loop.last %}, {% endif %}
{% endfor %}
],
datasets: [{
label: 'Утечки',
data: [
{% for day in leaks_stats['leaks_by_day'] %}
{{ day[1] }}{% if not loop.last %}, {% endif %}
{% endfor %}
],
borderColor: '#3399FF',
tension: 0.1,
fill: true,
backgroundColor: 'rgba(51, 153, 255, 0.1)'
}]
},
options: {
responsive: true,
plugins: {
legend: {
labels: {
color: '#fff'
}
}
},
scales: {
y: {
beginAtZero: true,
grid: {
color: '#444'
},
ticks: {
color: '#fff'
}
},
x: {
grid: {
color: '#444'
},
ticks: {
color: '#fff'
}
}
}
}
});
// График состояния парсеров
const parsersStatusCtx = document.getElementById('parsersStatusChart').getContext('2d');
new Chart(parsersStatusCtx, {
type: 'doughnut',
data: {
labels: ['Активны', 'Ошибки', 'Неактивны'],
datasets: [{
label: 'Состояние',
data: [
{{ parser_status['active'] }},
{{ parser_status['errors'] }},
0 // Неактивных нет в текущей логике
],
backgroundColor: ['#3399FF', '#ff4444', '#666666'],
}]
},
options: {
responsive: true,
plugins: {
legend: {
labels: {
color: '#fff',
}
}
}
}
});
// График источников утечек
const sourcesCtx = document.getElementById('sourcesChart').getContext('2d');
new Chart(sourcesCtx, {
type: 'bar',
data: {
labels: [
{% for source in leaks_stats['leaks_by_source'] %}
'{{ source[0] }}'{% if not loop.last %},{% endif %}
{% endfor %}
],
datasets: [{
label: 'Количество утечек',
data: [
{% for source in leaks_stats['leaks_by_source'] %}
{{ source[1] }}{% if not loop.last %},{% endif %}
{% endfor %}
],
backgroundColor: ['#3399FF', '#00cc99', '#0099cc'],
}]
},
options: {
responsive: true,
plugins: {
legend: {
labels: {
color: '#fff',
}
}
},
scales: {
y: {
beginAtZero: true,
grid: {
color: '#444',
},
ticks: {
color: '#fff',
}
},
x: {
grid: {
color: '#444',
},
ticks: {
color: '#fff',
}
}
}
}
});
const map = L.map('map').setView([55.7558, 37.6176], 5);
L.tileLayer('https://{s}.basemaps.cartocdn.com/dark_all/{z}/{x}/{y}{r}.png', {
attribution: '© OpenStreetMap contributors, © CARTO'
}).addTo(map);
const marker1 = L.marker([55.7558, 37.6176], {
icon: L.divIcon({ className: 'leaflet-marker-icon' })
}).addTo(map).bindPopup('Утечка данных в Москве');
const marker2 = L.marker([59.9343, 30.3351], {
icon: L.divIcon({ className: 'leaflet-marker-icon' })
}).addTo(map).bindPopup('Утечка данных в Санкт-Петербурге');
const sidebar = document.getElementById('sidebar');
const mainContent = document.getElementById('main-content');
const toggleBtn = document.getElementById('toggle-btn');
toggleBtn.addEventListener('click', () => {
sidebar.classList.toggle('collapsed');
mainContent.classList.toggle('collapsed');
});
</script>
</body>
</html>

241
web/templates/logs.html Normal file
View File

@ -0,0 +1,241 @@
<!DOCTYPE html>
<html lang="ru">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>Логи системы</title>
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0-beta3/css/all.min.css">
<style>
body {
margin: 0;
padding: 0;
font-family: 'Arial', sans-serif;
color: #ffffff;
overflow: hidden;
display: flex;
transition: margin-left 0.3s ease;
}
/* Боковое меню */
.sidebar {
width: 250px;
background-color: #2e2e2e;
padding: 20px;
box-shadow: 2px 0 5px rgba(0, 0, 0, 0.3);
transition: width 0.3s ease;
overflow-y: auto;
position: fixed;
top: 0;
left: 0;
bottom: 0;
z-index: 2;
}
.sidebar.collapsed {
width: 60px;
}
.sidebar.collapsed h2,
.sidebar.collapsed ul li a span {
display: none;
}
.sidebar h2 {
color: #3399FF;
text-align: center;
margin-bottom: 20px;
font-size: 24px;
transition: opacity 0.3s ease;
}
.sidebar ul {
list-style: none;
padding: 0;
}
.sidebar ul li {
margin: 15px 0;
}
.sidebar ul li a {
color: #ffffff;
text-decoration: none;
font-size: 16px;
transition: color 0.3s ease;
display: flex;
align-items: center;
}
.sidebar ul li a:hover {
color: #3399FF;
}
.sidebar ul li a i {
margin-right: 10px;
font-size: 20px;
}
.sidebar ul li a span {
transition: opacity 0.3s ease;
}
/* Кнопка для сворачивания/разворачивания меню */
.toggle-btn {
position: fixed;
left: 10px;
top: 10px;
background-color: #3399FF;
border: none;
color: #fff;
padding: 10px;
border-radius: 5px;
cursor: pointer;
z-index: 1000;
}
/* Основной контент */
.container {
flex: 1;
padding: 20px;
margin-left: 250px;
transition: margin-left 0.3s ease;
min-height: 100vh;
display: flex;
justify-content: center;
align-items: center;
}
.container.collapsed {
margin-left: 60px;
}
/* Видео на фоне */
.background-video {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
object-fit: cover;
z-index: -1;
opacity: 0.8;
}
/* Затемнение поверх видео */
.overlay {
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background: rgba(0, 0, 0, 0.7);
z-index: -1;
}
/* Контейнер для логов */
.logs-container {
width: 90%;
max-width: 800px;
background-color: rgba(46, 46, 46, 0.9);
padding: 20px;
border-radius: 10px;
box-shadow: 0 0 20px rgba(51, 153, 255, 0.5);
}
.logs-container h2 {
color: #3399FF;
margin-bottom: 20px;
font-size: 24px;
}
/* Стили для логов */
.logs {
height: 60vh;
overflow-y: auto;
font-family: 'Courier New', monospace;
font-size: 14px;
color: #3399FF;
padding-right: 10px;
}
.logs p {
margin: 10px 0;
padding: 5px;
background-color: rgba(0, 0, 0, 0.3);
border-radius: 5px;
}
/* Стили для скроллбара */
.logs::-webkit-scrollbar {
width: 8px;
}
.logs::-webkit-scrollbar-track {
background: rgba(0, 0, 0, 0.2);
border-radius: 5px;
}
.logs::-webkit-scrollbar-thumb {
background: #00ffcc;
border-radius: 5px;
}
.logs::-webkit-scrollbar-thumb:hover {
background: #00cc99;
}
</style>
</head>
<body>
<!-- Боковое меню -->
<div class="sidebar" id="sidebar">
<h2>Меню</h2>
<ul>
<li><a href="index.html"><i class="fas fa-home"></i><span>Главная</span></a></li>
<li><a href="#"><i class="fas fa-cog"></i><span>Настройки</span></a></li>
<li><a href="#"><i class="fas fa-chart-line"></i><span>Отчеты</span></a></li>
<li><a href="#"><i class="fas fa-code"></i><span>Парсеры</span></a></li>
<li><a href="logs.html"><i class="fas fa-file-alt"></i><span>Логи</span></a></li>
</ul>
</div>
<!-- Кнопка для сворачивания/разворачивания меню -->
<button class="toggle-btn" id="toggle-btn">
<i class="fas fa-bars"></i>
</button>
<!-- Основной контент -->
<div class="container" id="main-content">
<!-- Видео на фоне -->
<video class="background-video" autoplay loop muted>
<source src="{{ url_for('static', filename='alb_glob0411_1080p_24fps.mp4') }}" type="video/mp4">
Ваш браузер не поддерживает видео.
</video>
<!-- Затемнение поверх видео -->
<div class="overlay"></div>
<!-- Контейнер для логов -->
<div class="logs-container">
<h2>Логи системы (tg_nodes.log)</h2>
<div class="logs" id="logs-container">
{% for log in logs %}
<p>{{ log }}</p>
{% else %}
<p>Нет записей в логах</p>
{% endfor %}
</div>
</div>
</div>
<script>
// Сворачивание/разворачивание бокового меню
const sidebar = document.getElementById('sidebar');
const mainContent = document.getElementById('main-content');
const toggleBtn = document.getElementById('toggle-btn');
toggleBtn.addEventListener('click', () => {
sidebar.classList.toggle('collapsed');
mainContent.classList.toggle('collapsed');
});
</script>
</body>
</html>