feat: celery
This commit is contained in:
parent
166ad0ba2f
commit
44ce626a1b
3
.gitignore
vendored
3
.gitignore
vendored
@ -2,3 +2,6 @@
|
|||||||
*.log
|
*.log
|
||||||
**/__pycache__/
|
**/__pycache__/
|
||||||
/web/static/*.mp4
|
/web/static/*.mp4
|
||||||
|
/celerybeat-schedule.bak
|
||||||
|
/celerybeat-schedule.dat
|
||||||
|
/celerybeat-schedule.dir
|
37
celery_app.py
Normal file
37
celery_app.py
Normal file
@ -0,0 +1,37 @@
|
|||||||
|
from celery import Celery
|
||||||
|
from kombu import Queue
|
||||||
|
|
||||||
|
app = Celery(
|
||||||
|
'leak_monitor',
|
||||||
|
broker='memory://localhost',
|
||||||
|
# backend='rpc://',
|
||||||
|
include=['tasks.tg_crawler', 'tasks.forum_crawler']
|
||||||
|
)
|
||||||
|
|
||||||
|
app.conf.update(
|
||||||
|
worker_pool='solo',
|
||||||
|
worker_max_tasks_per_child=100,
|
||||||
|
task_serializer='json',
|
||||||
|
result_serializer='json',
|
||||||
|
accept_content=['json'],
|
||||||
|
timezone='Europe/Moscow',
|
||||||
|
enable_utc=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
app.conf.task_queues = (
|
||||||
|
Queue('telegram', routing_key='telegram.#'),
|
||||||
|
Queue('forum', routing_key='forum.#'),
|
||||||
|
)
|
||||||
|
|
||||||
|
app.conf.beat_schedule = {
|
||||||
|
'monitor-telegram-channels': {
|
||||||
|
'task': 'tasks.tg_crawler.monitor_channel',
|
||||||
|
'schedule': 10.0,
|
||||||
|
'options': {'queue': 'telegram'}
|
||||||
|
},
|
||||||
|
'crawl-forum': {
|
||||||
|
'task': 'forum_crawler.crawl_forum_task',
|
||||||
|
'schedule': 3600.0 * 24, # час
|
||||||
|
'args': ('https://thehackernews.com/search/label/data%20breach', [])
|
||||||
|
},
|
||||||
|
}
|
19
celery_beat.py
Normal file
19
celery_beat.py
Normal file
@ -0,0 +1,19 @@
|
|||||||
|
from celery_app import app
|
||||||
|
from utils.logg import LoggerSingleton
|
||||||
|
|
||||||
|
logger = LoggerSingleton.get_logger()
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
logger.info("Starting Celery beat scheduler...")
|
||||||
|
try:
|
||||||
|
app.loader.import_default_modules()
|
||||||
|
|
||||||
|
beat = app.Beat(
|
||||||
|
logfile=None,
|
||||||
|
loglevel='info',
|
||||||
|
socket_timeout=30
|
||||||
|
)
|
||||||
|
beat.run()
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Beat failed: {e}")
|
||||||
|
raise
|
97
dld.py
Normal file
97
dld.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import os
|
||||||
|
import psycopg2
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from celery import Celery
|
||||||
|
from utils.logg import LoggerSingleton
|
||||||
|
from driver.driver_creator import DriverCreator
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from time import sleep
|
||||||
|
from celery_app import app
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
logger = LoggerSingleton.get_logger()
|
||||||
|
|
||||||
|
class ForumCrawler:
|
||||||
|
def __init__(self, forum_url):
|
||||||
|
self.forum_url = forum_url
|
||||||
|
# self.proxy_list = proxy_list
|
||||||
|
# self.db_config = {
|
||||||
|
# 'host': os.getenv('DB_HOST'),
|
||||||
|
# 'port': os.getenv('DB_PORT'),
|
||||||
|
# 'database': os.getenv('DB_NAME'),
|
||||||
|
# 'user': os.getenv('DB_USER'),
|
||||||
|
# 'password': os.getenv('DB_PASSWORD')
|
||||||
|
# }
|
||||||
|
self.conn = None
|
||||||
|
# self._connect()
|
||||||
|
|
||||||
|
def _connect(self):
|
||||||
|
try:
|
||||||
|
self.conn = psycopg2.connect(**self.db_config)
|
||||||
|
logger.info("Database connection established")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Database connection error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _close(self):
|
||||||
|
if self.conn and not self.conn.closed:
|
||||||
|
try:
|
||||||
|
self.conn.close()
|
||||||
|
logger.info("Database connection closed")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error closing database connection: {e}")
|
||||||
|
|
||||||
|
def _crawl_leaks(self):
|
||||||
|
driver_creator = DriverCreator([])
|
||||||
|
driver = driver_creator.get_driver()
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting forum crawl: {self.forum_url}")
|
||||||
|
driver.get(self.forum_url)
|
||||||
|
sleep(5)
|
||||||
|
|
||||||
|
posts = driver.find_elements(By.CSS_SELECTOR, 'a.story-link h2.home-title')
|
||||||
|
if not posts:
|
||||||
|
logger.info("No posts found on the page")
|
||||||
|
return
|
||||||
|
last_post = posts[0]
|
||||||
|
title = last_post.text.strip()
|
||||||
|
link = last_post.get_attribute('href')
|
||||||
|
post_content = f"{title} - {link}"
|
||||||
|
|
||||||
|
if 'data breach' in title.lower() or 'leak' in title.lower():
|
||||||
|
logger.info(post_content)
|
||||||
|
# self._save_to_db('Hacker News', post_content)
|
||||||
|
logger.info(f"New leak found: {title} - {link}")
|
||||||
|
else:
|
||||||
|
logger.info("Last post is not about leaks")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during forum crawling: {e}")
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
driver.quit()
|
||||||
|
logger.info("WebDriver session closed")
|
||||||
|
|
||||||
|
def _save_to_db(self, source, message):
|
||||||
|
if not self.conn or self.conn.closed:
|
||||||
|
self._connect()
|
||||||
|
try:
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO leaks (resource_name, message) VALUES (%s, %s)",
|
||||||
|
(source, message)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
logger.info(f"Leak from {source} saved in the database")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error writing to the database: {e}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
crawler = ForumCrawler('https://thehackernews.com/search/label/data%20breach')
|
||||||
|
try:
|
||||||
|
crawler._crawl_leaks()
|
||||||
|
finally:
|
||||||
|
crawler._close()
|
@ -27,34 +27,30 @@ class DriverCreator:
|
|||||||
).create_extension()
|
).create_extension()
|
||||||
|
|
||||||
def get_driver(self):
|
def get_driver(self):
|
||||||
'''
|
|
||||||
Отключает JS
|
|
||||||
Каждый запрос получает `сырой` html
|
|
||||||
'''
|
|
||||||
# extension_path = self._switch_proxy()
|
# extension_path = self._switch_proxy()
|
||||||
|
|
||||||
options = uc.ChromeOptions()
|
options = uc.ChromeOptions()
|
||||||
# options.add_argument(f"--load-extension={extension_path}") # временно
|
# options.add_argument(f"--load-extension={extension_path}") # временно
|
||||||
options.add_argument("--headless=new")
|
# options.add_argument("--headless=new")
|
||||||
options.add_argument("--disable-gpu")
|
options.add_argument("--disable-gpu")
|
||||||
options.add_argument("--disable-dev-shm-usage")
|
options.add_argument("--disable-dev-shm-usage")
|
||||||
options.add_argument("--no-sandbox")
|
options.add_argument("--no-sandbox")
|
||||||
options.add_argument("--disable-webgl")
|
# options.add_argument("--disable-webgl")
|
||||||
options.add_argument("--disable-software-rasterizer")
|
# options.add_argument("--disable-software-rasterizer")
|
||||||
# options.add_argument("--disable-extensions")
|
# options.add_argument("--disable-extensions")
|
||||||
|
|
||||||
prefs = {"profile.managed_default_content_settings.javascript": 2}
|
# prefs = {"profile.managed_default_content_settings.javascript": 2}
|
||||||
options.experimental_options["prefs"] = prefs
|
# options.experimental_options["prefs"] = prefs
|
||||||
|
|
||||||
driver = uc.Chrome(
|
driver = uc.Chrome(
|
||||||
options=options,
|
options=options,
|
||||||
version_main=132,
|
version_main=135,
|
||||||
# user_multi_procs=True
|
# user_multi_procs=True
|
||||||
)
|
)
|
||||||
|
|
||||||
driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
# driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", {
|
||||||
"source": "Object.defineProperty(navigator, 'javaEnabled', {get: () => false});"
|
# "source": "Object.defineProperty(navigator, 'javaEnabled', {get: () => false});"
|
||||||
})
|
# })
|
||||||
driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True})
|
# driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True})
|
||||||
|
|
||||||
return driver
|
return driver
|
28
initiator.py
Normal file
28
initiator.py
Normal file
@ -0,0 +1,28 @@
|
|||||||
|
from celery_app import app
|
||||||
|
from utils.logg import LoggerSingleton
|
||||||
|
|
||||||
|
logger = LoggerSingleton.get_logger()
|
||||||
|
|
||||||
|
# Брокер центральный узел, который:
|
||||||
|
# Принимает задачи от beat (планировщика)
|
||||||
|
# Распределяет их по очередям (telegram, forum).
|
||||||
|
# Передаёт задачи воркерам, которые подписаны на эти очереди.
|
||||||
|
|
||||||
|
def main():
|
||||||
|
try:
|
||||||
|
logger.info("Starting Celery worker...")
|
||||||
|
app.worker_main(
|
||||||
|
argv=[
|
||||||
|
'worker',
|
||||||
|
'--loglevel=debug',
|
||||||
|
'--pool=solo',
|
||||||
|
'-Q', 'telegram',
|
||||||
|
'--without-heartbeat',
|
||||||
|
'--without-gossip'
|
||||||
|
]
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to start worker: {e}")
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
main()
|
@ -9,3 +9,7 @@ telethon
|
|||||||
schedule
|
schedule
|
||||||
psycopg2-binary
|
psycopg2-binary
|
||||||
docker
|
docker
|
||||||
|
asyncio
|
||||||
|
pytz
|
||||||
|
flask
|
||||||
|
apscheduler
|
0
tasks/__init__.py
Normal file
0
tasks/__init__.py
Normal file
97
tasks/forum_crawler.py
Normal file
97
tasks/forum_crawler.py
Normal file
@ -0,0 +1,97 @@
|
|||||||
|
from bs4 import BeautifulSoup
|
||||||
|
import os
|
||||||
|
import psycopg2
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from celery import Celery
|
||||||
|
from utils.logg import LoggerSingleton
|
||||||
|
from driver.driver_creator import DriverCreator
|
||||||
|
from selenium.webdriver.common.by import By
|
||||||
|
from time import sleep
|
||||||
|
from celery_app import app
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
logger = LoggerSingleton.get_logger()
|
||||||
|
|
||||||
|
class ForumCrawler:
|
||||||
|
def __init__(self, forum_url, proxy_list):
|
||||||
|
self.forum_url = forum_url
|
||||||
|
self.proxy_list = proxy_list
|
||||||
|
self.db_config = {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'port': os.getenv('DB_PORT'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD')
|
||||||
|
}
|
||||||
|
self.conn = None
|
||||||
|
self._connect()
|
||||||
|
|
||||||
|
def _connect(self):
|
||||||
|
try:
|
||||||
|
self.conn = psycopg2.connect(**self.db_config)
|
||||||
|
logger.info("Database connection established")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Database connection error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _close(self):
|
||||||
|
if self.conn and not self.conn.closed:
|
||||||
|
try:
|
||||||
|
self.conn.close()
|
||||||
|
logger.info("Database connection closed")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error closing database connection: {e}")
|
||||||
|
|
||||||
|
def _crawl_leaks(self):
|
||||||
|
driver_creator = DriverCreator(self.proxy_list)
|
||||||
|
driver = driver_creator.get_driver()
|
||||||
|
|
||||||
|
try:
|
||||||
|
logger.info(f"Starting forum crawl: {self.forum_url}")
|
||||||
|
driver.get(self.forum_url)
|
||||||
|
sleep(2)
|
||||||
|
|
||||||
|
posts = driver.find_elements(By.CSS_SELECTOR, 'a.story-link h2.home-title')
|
||||||
|
if not posts:
|
||||||
|
logger.info("No posts found on the page")
|
||||||
|
return
|
||||||
|
last_post = posts[0]
|
||||||
|
title = last_post.text.strip()
|
||||||
|
link = last_post.get_attribute('href')
|
||||||
|
post_content = f"{title} - {link}"
|
||||||
|
|
||||||
|
if 'data breach' in title.lower() or 'leak' in title.lower():
|
||||||
|
self._save_to_db('Hacker News', post_content)
|
||||||
|
logger.info(f"New leak found: {title} - {link}")
|
||||||
|
else:
|
||||||
|
logger.info("Last post is not about leaks")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error during forum crawling: {e}")
|
||||||
|
raise
|
||||||
|
finally:
|
||||||
|
driver.quit()
|
||||||
|
logger.info("WebDriver session closed")
|
||||||
|
|
||||||
|
def _save_to_db(self, source, message):
|
||||||
|
if not self.conn or self.conn.closed:
|
||||||
|
self._connect()
|
||||||
|
try:
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO leaks (resource_name, message) VALUES (%s, %s)",
|
||||||
|
(source, message)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
logger.info(f"Leak from {source} saved in the database")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error writing to the database: {e}")
|
||||||
|
|
||||||
|
@app.task(name='forum_crawler.crawl_forum_task')
|
||||||
|
def crawl_forum_task(forum_url, proxy_list):
|
||||||
|
crawler = ForumCrawler(forum_url, proxy_list)
|
||||||
|
try:
|
||||||
|
crawler._crawl_leaks()
|
||||||
|
finally:
|
||||||
|
crawler._close()
|
121
tasks/tg_crawler.py
Normal file
121
tasks/tg_crawler.py
Normal file
@ -0,0 +1,121 @@
|
|||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import asyncio
|
||||||
|
import psycopg2
|
||||||
|
from datetime import datetime
|
||||||
|
from telethon.sync import TelegramClient
|
||||||
|
from telethon.tl.functions.messages import GetHistoryRequest
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
from celery import Celery
|
||||||
|
from utils.logg import LoggerSingleton
|
||||||
|
from celery_app import app
|
||||||
|
|
||||||
|
load_dotenv()
|
||||||
|
|
||||||
|
logger = LoggerSingleton.get_logger()
|
||||||
|
|
||||||
|
class TelegramMonitor:
|
||||||
|
def __init__(self):
|
||||||
|
self.db_config = {
|
||||||
|
'host': os.getenv('DB_HOST'),
|
||||||
|
'port': os.getenv('DB_PORT'),
|
||||||
|
'database': os.getenv('DB_NAME'),
|
||||||
|
'user': os.getenv('DB_USER'),
|
||||||
|
'password': os.getenv('DB_PASSWORD')
|
||||||
|
}
|
||||||
|
self.conn = None
|
||||||
|
self._connect()
|
||||||
|
|
||||||
|
def _connect(self):
|
||||||
|
try:
|
||||||
|
self.conn = psycopg2.connect(**self.db_config)
|
||||||
|
logger.info("Database connection established")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Database connection error: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _close(self):
|
||||||
|
if self.conn and not self.conn.closed:
|
||||||
|
try:
|
||||||
|
self.conn.close()
|
||||||
|
logger.info("Database connection closed")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error closing database connection: {e}")
|
||||||
|
|
||||||
|
def _fetch_post(self, channel_username, source_name):
|
||||||
|
try:
|
||||||
|
with TelegramClient('session', os.getenv('API_ID'), os.getenv('API_HASH')) as client:
|
||||||
|
entity = client.get_entity(channel_username)
|
||||||
|
history = client(GetHistoryRequest(
|
||||||
|
peer=entity,
|
||||||
|
limit=1,
|
||||||
|
offset_date=None,
|
||||||
|
offset_id=0,
|
||||||
|
max_id=0,
|
||||||
|
min_id=0,
|
||||||
|
add_offset=0,
|
||||||
|
hash=0
|
||||||
|
))
|
||||||
|
if history.messages:
|
||||||
|
self._save_to_db(source_name, history.messages[0].message)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error fetching post from {source_name}: {e}")
|
||||||
|
raise
|
||||||
|
|
||||||
|
def _save_to_db(self, source, message):
|
||||||
|
if not self.conn or self.conn.closed:
|
||||||
|
self._connect()
|
||||||
|
|
||||||
|
try:
|
||||||
|
with self.conn.cursor() as cur:
|
||||||
|
cur.execute(
|
||||||
|
"INSERT INTO leaks (resource_name, message) VALUES (%s, %s)",
|
||||||
|
(source, message)
|
||||||
|
)
|
||||||
|
self.conn.commit()
|
||||||
|
logger.info(f"Data from {source} saved successfully")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Database save error for {source}: {e}")
|
||||||
|
self.conn.rollback()
|
||||||
|
raise
|
||||||
|
|
||||||
|
@app.task(bind=True, name='tasks.tg_crawler.monitor_channels')
|
||||||
|
def monitor_channels(self):
|
||||||
|
channels = [
|
||||||
|
('trueosint', 'trueosint'),
|
||||||
|
('dataleak', 'dataleak'),
|
||||||
|
('Vaultofdataleaksss', 'Vaultofdataleaksss')
|
||||||
|
]
|
||||||
|
|
||||||
|
logger.info("Starting Telegram channels monitoring")
|
||||||
|
|
||||||
|
for channel, source in channels:
|
||||||
|
try:
|
||||||
|
monitor_channel.apply_async(
|
||||||
|
args=(channel, source),
|
||||||
|
queue='telegram',
|
||||||
|
retry=True,
|
||||||
|
retry_policy={
|
||||||
|
'max_retries': 3,
|
||||||
|
'interval_start': 2,
|
||||||
|
'interval_step': 5,
|
||||||
|
'interval_max': 20
|
||||||
|
}
|
||||||
|
)
|
||||||
|
logger.info(f"Sent task for channel: {channel}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Failed to send task for {channel}: {e}")
|
||||||
|
raise self.retry(exc=e)
|
||||||
|
|
||||||
|
@app.task(bind=True, name='tasks.tg_crawler.monitor_channel')
|
||||||
|
def monitor_channel(self, channel, source):
|
||||||
|
logger.info(f"Starting monitoring channel: {channel}")
|
||||||
|
monitor = TelegramMonitor()
|
||||||
|
try:
|
||||||
|
monitor._fetch_post(channel, source)
|
||||||
|
logger.info(f"Successfully monitored channel: {channel}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Channel monitoring failed for {channel}: {e}")
|
||||||
|
raise self.retry(exc=e)
|
||||||
|
finally:
|
||||||
|
monitor._close()
|
@ -1,77 +0,0 @@
|
|||||||
import logging
|
|
||||||
import os
|
|
||||||
import psycopg2
|
|
||||||
from datetime import datetime
|
|
||||||
from telethon.sync import TelegramClient
|
|
||||||
from telethon.tl.functions.messages import GetHistoryRequest
|
|
||||||
|
|
||||||
LOG_FILE = os.getenv('LOG_FILE', '/app/tg_nodes.log')
|
|
||||||
|
|
||||||
if os.path.exists(LOG_FILE) and os.path.isdir(LOG_FILE):
|
|
||||||
raise RuntimeError(f"Path {LOG_FILE} is a directory! Expected file.")
|
|
||||||
|
|
||||||
logging.basicConfig(
|
|
||||||
handlers=[
|
|
||||||
logging.FileHandler(LOG_FILE),
|
|
||||||
logging.StreamHandler()
|
|
||||||
]
|
|
||||||
)
|
|
||||||
|
|
||||||
class TelegramChannelMonitor:
|
|
||||||
db_config = None
|
|
||||||
|
|
||||||
def __init__(self, session_name, api_id, api_hash, channel_username, source_name):
|
|
||||||
self.session_name = session_name
|
|
||||||
self.api_id = api_id
|
|
||||||
self.api_hash = api_hash
|
|
||||||
self.channel_username = channel_username
|
|
||||||
self.source_name = source_name
|
|
||||||
|
|
||||||
@classmethod
|
|
||||||
def set_db_config(cls, config):
|
|
||||||
cls.db_config = config
|
|
||||||
|
|
||||||
def fetch_last_post(self):
|
|
||||||
logging.info(f"[{self.source_name}] checking a new post...")
|
|
||||||
|
|
||||||
try:
|
|
||||||
with TelegramClient(self.session_name, self.api_id, self.api_hash) as client:
|
|
||||||
entity = client.get_entity(self.channel_username)
|
|
||||||
history = client(GetHistoryRequest(
|
|
||||||
peer=entity,
|
|
||||||
limit=1,
|
|
||||||
offset_date=None,
|
|
||||||
offset_id=0,
|
|
||||||
max_id=0,
|
|
||||||
min_id=0,
|
|
||||||
add_offset=0,
|
|
||||||
hash=0
|
|
||||||
))
|
|
||||||
|
|
||||||
if history.messages:
|
|
||||||
msg = history.messages[0]
|
|
||||||
logging.info(f"[{self.source_name}] received a post: {msg.message[:60]}...")
|
|
||||||
self.save_to_db(self.source_name, msg.message)
|
|
||||||
else:
|
|
||||||
logging.info(f"[{self.source_name}] there is no new messages")
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"[{self.source_name}] error when receiving a post: {e}")
|
|
||||||
|
|
||||||
def save_to_db(self, source, message):
|
|
||||||
if not self.db_config:
|
|
||||||
logging.error("DB config is not set")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
conn = psycopg2.connect(**self.db_config)
|
|
||||||
cur = conn.cursor()
|
|
||||||
cur.execute(
|
|
||||||
"INSERT INTO leaks (source, message) VALUES (%s, %s)",
|
|
||||||
(source, message)
|
|
||||||
)
|
|
||||||
conn.commit()
|
|
||||||
cur.close()
|
|
||||||
conn.close()
|
|
||||||
logging.info(f"[{self.source_name}] message is recorded in the database")
|
|
||||||
except Exception as e:
|
|
||||||
logging.error(f"[{self.source_name}] error when writing to the database: {e}")
|
|
@ -1,47 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
import os
|
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
|
||||||
from pytz import timezone
|
|
||||||
from tg_crawler import TelegramChannelMonitor
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
import logging
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
TelegramChannelMonitor.set_db_config({
|
|
||||||
'host': os.getenv("HOST"),
|
|
||||||
'port': os.getenv("PORT"),
|
|
||||||
'database': os.getenv("DBNAME"),
|
|
||||||
'user': os.getenv("USER"),
|
|
||||||
'password': os.getenv("PASSWORD")
|
|
||||||
})
|
|
||||||
|
|
||||||
monitor = TelegramChannelMonitor(
|
|
||||||
session_name='session_trueosint',
|
|
||||||
api_id=os.getenv("TELETHON_API_ID"),
|
|
||||||
api_hash=os.getenv("TELETHON_API_HASH"),
|
|
||||||
channel_username='trueosint',
|
|
||||||
source_name='trueosint'
|
|
||||||
)
|
|
||||||
|
|
||||||
scheduler = BackgroundScheduler()
|
|
||||||
scheduler.add_job(
|
|
||||||
monitor.fetch_last_post,
|
|
||||||
'cron',
|
|
||||||
hour=9,
|
|
||||||
minute=0,
|
|
||||||
timezone=timezone("Europe/Moscow")
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
scheduler.start()
|
|
||||||
logging.info("Scheduler started successfully")
|
|
||||||
while True:
|
|
||||||
pass
|
|
||||||
except (KeyboardInterrupt, SystemExit):
|
|
||||||
scheduler.shutdown()
|
|
||||||
logging.info("Scheduler shut down successfully")
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
@ -1,48 +0,0 @@
|
|||||||
import asyncio
|
|
||||||
import os
|
|
||||||
from apscheduler.schedulers.background import BackgroundScheduler
|
|
||||||
from pytz import timezone
|
|
||||||
from tg_crawler import TelegramChannelMonitor
|
|
||||||
from dotenv import load_dotenv
|
|
||||||
import logging
|
|
||||||
|
|
||||||
|
|
||||||
load_dotenv()
|
|
||||||
|
|
||||||
def main():
|
|
||||||
TelegramChannelMonitor.set_db_config({
|
|
||||||
'host': os.getenv("HOST"),
|
|
||||||
'port': os.getenv("PORT"),
|
|
||||||
'database': os.getenv("DBNAME"),
|
|
||||||
'user': os.getenv("USER"),
|
|
||||||
'password': os.getenv("PASSWORD")
|
|
||||||
})
|
|
||||||
|
|
||||||
monitor = TelegramChannelMonitor(
|
|
||||||
session_name='session_trueosint',
|
|
||||||
api_id=os.getenv("TELETHON_API_ID"),
|
|
||||||
api_hash=os.getenv("TELETHON_API_HASH"),
|
|
||||||
channel_username='dataleak',
|
|
||||||
source_name='dataleak'
|
|
||||||
)
|
|
||||||
|
|
||||||
scheduler = BackgroundScheduler()
|
|
||||||
scheduler.add_job(
|
|
||||||
monitor.fetch_last_post,
|
|
||||||
'cron',
|
|
||||||
hour=9,
|
|
||||||
minute=0,
|
|
||||||
timezone=timezone("Europe/Moscow")
|
|
||||||
)
|
|
||||||
|
|
||||||
try:
|
|
||||||
scheduler.start()
|
|
||||||
logging.info("Scheduler started successfully")
|
|
||||||
while True:
|
|
||||||
pass
|
|
||||||
except (KeyboardInterrupt, SystemExit):
|
|
||||||
scheduler.shutdown()
|
|
||||||
logging.info("Scheduler shut down successfully")
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
|
||||||
main()
|
|
27
utils/logg.py
Normal file
27
utils/logg.py
Normal file
@ -0,0 +1,27 @@
|
|||||||
|
import logging
|
||||||
|
|
||||||
|
class LoggerSingleton:
|
||||||
|
_logger = None
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def get_logger():
|
||||||
|
if LoggerSingleton._logger is None:
|
||||||
|
LoggerSingleton._logger = logging.getLogger(__name__)
|
||||||
|
|
||||||
|
LoggerSingleton._logger.setLevel(logging.INFO)
|
||||||
|
|
||||||
|
formatter = logging.Formatter(
|
||||||
|
"%(asctime)s - %(name)s - %(levelname)s - %(message)s",
|
||||||
|
datefmt="%Y-%m-%d %H:%M:%S"
|
||||||
|
)
|
||||||
|
|
||||||
|
console_handler = logging.StreamHandler()
|
||||||
|
console_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
file_handler = logging.FileHandler('tempora.log', encoding='utf-8')
|
||||||
|
file_handler.setFormatter(formatter)
|
||||||
|
|
||||||
|
LoggerSingleton._logger.addHandler(console_handler)
|
||||||
|
LoggerSingleton._logger.addHandler(file_handler)
|
||||||
|
|
||||||
|
return LoggerSingleton._logger
|
@ -102,6 +102,7 @@ def get_leaks_stats():
|
|||||||
conn.close()
|
conn.close()
|
||||||
|
|
||||||
@app.route("/")
|
@app.route("/")
|
||||||
|
@app.route("/index.html")
|
||||||
def index():
|
def index():
|
||||||
parser_status = get_parser_status()
|
parser_status = get_parser_status()
|
||||||
leaks_stats = get_leaks_stats()
|
leaks_stats = get_leaks_stats()
|
||||||
@ -120,6 +121,7 @@ def index():
|
|||||||
leaks_stats=leaks_stats
|
leaks_stats=leaks_stats
|
||||||
)
|
)
|
||||||
|
|
||||||
|
@app.route("/logs.html")
|
||||||
@app.route("/logs")
|
@app.route("/logs")
|
||||||
def logs():
|
def logs():
|
||||||
log_path = '/app/tg_nodes.log'
|
log_path = '/app/tg_nodes.log'
|
||||||
|
@ -99,8 +99,10 @@
|
|||||||
padding: 20px;
|
padding: 20px;
|
||||||
margin-left: 250px;
|
margin-left: 250px;
|
||||||
transition: margin-left 0.3s ease;
|
transition: margin-left 0.3s ease;
|
||||||
position: relative;
|
min-height: 100vh;
|
||||||
z-index: 1;
|
display: flex;
|
||||||
|
justify-content: center;
|
||||||
|
align-items: center;
|
||||||
}
|
}
|
||||||
|
|
||||||
.container.collapsed {
|
.container.collapsed {
|
||||||
@ -116,7 +118,7 @@
|
|||||||
height: 100%;
|
height: 100%;
|
||||||
object-fit: cover;
|
object-fit: cover;
|
||||||
z-index: -1;
|
z-index: -1;
|
||||||
opacity: 0.8; /* Полупрозрачность видео */
|
opacity: 0.8;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Затемнение поверх видео */
|
/* Затемнение поверх видео */
|
||||||
@ -126,23 +128,18 @@
|
|||||||
left: 0;
|
left: 0;
|
||||||
width: 100%;
|
width: 100%;
|
||||||
height: 100%;
|
height: 100%;
|
||||||
background: rgba(0, 0, 0, 0.7); /* Черный полупрозрачный слой */
|
background: rgba(0, 0, 0, 0.7);
|
||||||
z-index: -1;
|
z-index: -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
/* Контейнер для логов */
|
/* Контейнер для логов */
|
||||||
.logs-container {
|
.logs-container {
|
||||||
position: absolute;
|
width: 90%;
|
||||||
top: 50%;
|
max-width: 800px;
|
||||||
left: 20px;
|
background-color: rgba(46, 46, 46, 0.9);
|
||||||
transform: translateY(-50%);
|
|
||||||
width: 40%;
|
|
||||||
max-width: 600px;
|
|
||||||
background-color: rgba(46, 46, 46, 0.9); /* Полупрозрачный фон */
|
|
||||||
padding: 20px;
|
padding: 20px;
|
||||||
border-radius: 10px;
|
border-radius: 10px;
|
||||||
box-shadow: 0 0 20px rgba(51, 153, 255, 0.5);
|
box-shadow: 0 0 20px rgba(51, 153, 255, 0.5);
|
||||||
z-index: 1;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.logs-container h2 {
|
.logs-container h2 {
|
||||||
|
Loading…
Reference in New Issue
Block a user