feat: celery
This commit is contained in:
		
							parent
							
								
									166ad0ba2f
								
							
						
					
					
						commit
						44ce626a1b
					
				
							
								
								
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										3
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @ -2,3 +2,6 @@ | |||||||
| *.log | *.log | ||||||
| **/__pycache__/ | **/__pycache__/ | ||||||
| /web/static/*.mp4 | /web/static/*.mp4 | ||||||
|  | /celerybeat-schedule.bak | ||||||
|  | /celerybeat-schedule.dat | ||||||
|  | /celerybeat-schedule.dir | ||||||
							
								
								
									
										37
									
								
								celery_app.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										37
									
								
								celery_app.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,37 @@ | |||||||
|  | from celery import Celery | ||||||
|  | from kombu import Queue | ||||||
|  | 
 | ||||||
|  | app = Celery( | ||||||
|  |     'leak_monitor', | ||||||
|  |     broker='memory://localhost', | ||||||
|  |     # backend='rpc://', | ||||||
|  |     include=['tasks.tg_crawler', 'tasks.forum_crawler'] | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | app.conf.update( | ||||||
|  |     worker_pool='solo', | ||||||
|  |     worker_max_tasks_per_child=100, | ||||||
|  |     task_serializer='json', | ||||||
|  |     result_serializer='json', | ||||||
|  |     accept_content=['json'], | ||||||
|  |     timezone='Europe/Moscow', | ||||||
|  |     enable_utc=True, | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | app.conf.task_queues = ( | ||||||
|  |     Queue('telegram', routing_key='telegram.#'), | ||||||
|  |     Queue('forum', routing_key='forum.#'), | ||||||
|  | ) | ||||||
|  | 
 | ||||||
|  | app.conf.beat_schedule = { | ||||||
|  |     'monitor-telegram-channels': { | ||||||
|  |         'task': 'tasks.tg_crawler.monitor_channel', | ||||||
|  |         'schedule': 10.0, | ||||||
|  |         'options': {'queue': 'telegram'} | ||||||
|  |     }, | ||||||
|  |     'crawl-forum': { | ||||||
|  |         'task': 'forum_crawler.crawl_forum_task', | ||||||
|  |         'schedule': 3600.0 * 24,   # час | ||||||
|  |         'args': ('https://thehackernews.com/search/label/data%20breach', []) | ||||||
|  |     }, | ||||||
|  | } | ||||||
							
								
								
									
										19
									
								
								celery_beat.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								celery_beat.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,19 @@ | |||||||
|  | from celery_app import app | ||||||
|  | from utils.logg import LoggerSingleton | ||||||
|  | 
 | ||||||
|  | logger = LoggerSingleton.get_logger() | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     logger.info("Starting Celery beat scheduler...") | ||||||
|  |     try: | ||||||
|  |         app.loader.import_default_modules() | ||||||
|  |          | ||||||
|  |         beat = app.Beat( | ||||||
|  |             logfile=None,  | ||||||
|  |             loglevel='info', | ||||||
|  |             socket_timeout=30 | ||||||
|  |         ) | ||||||
|  |         beat.run() | ||||||
|  |     except Exception as e: | ||||||
|  |         logger.error(f"Beat failed: {e}") | ||||||
|  |         raise | ||||||
							
								
								
									
										97
									
								
								dld.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								dld.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,97 @@ | |||||||
|  | from bs4 import BeautifulSoup | ||||||
|  | import os | ||||||
|  | import psycopg2 | ||||||
|  | from dotenv import load_dotenv | ||||||
|  | from celery import Celery | ||||||
|  | from utils.logg import LoggerSingleton | ||||||
|  | from driver.driver_creator import DriverCreator | ||||||
|  | from selenium.webdriver.common.by import By | ||||||
|  | from time import sleep | ||||||
|  | from celery_app import app | ||||||
|  | 
 | ||||||
|  | load_dotenv() | ||||||
|  | 
 | ||||||
|  | logger = LoggerSingleton.get_logger() | ||||||
|  | 
 | ||||||
|  | class ForumCrawler: | ||||||
|  |     def __init__(self, forum_url): | ||||||
|  |         self.forum_url = forum_url | ||||||
|  |         # self.proxy_list = proxy_list | ||||||
|  |         # self.db_config = { | ||||||
|  |         #     'host': os.getenv('DB_HOST'), | ||||||
|  |         #     'port': os.getenv('DB_PORT'), | ||||||
|  |         #     'database': os.getenv('DB_NAME'), | ||||||
|  |         #     'user': os.getenv('DB_USER'), | ||||||
|  |         #     'password': os.getenv('DB_PASSWORD') | ||||||
|  |         # } | ||||||
|  |         self.conn = None | ||||||
|  |         # self._connect() | ||||||
|  | 
 | ||||||
|  |     def _connect(self): | ||||||
|  |         try: | ||||||
|  |             self.conn = psycopg2.connect(**self.db_config) | ||||||
|  |             logger.info("Database connection established") | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Database connection error: {e}") | ||||||
|  |             raise | ||||||
|  | 
 | ||||||
|  |     def _close(self): | ||||||
|  |         if self.conn and not self.conn.closed: | ||||||
|  |             try: | ||||||
|  |                 self.conn.close() | ||||||
|  |                 logger.info("Database connection closed") | ||||||
|  |             except Exception as e: | ||||||
|  |                 logger.error(f"Error closing database connection: {e}") | ||||||
|  | 
 | ||||||
|  |     def _crawl_leaks(self): | ||||||
|  |         driver_creator = DriverCreator([]) | ||||||
|  |         driver = driver_creator.get_driver() | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|  |             logger.info(f"Starting forum crawl: {self.forum_url}") | ||||||
|  |             driver.get(self.forum_url) | ||||||
|  |             sleep(5) | ||||||
|  | 
 | ||||||
|  |             posts = driver.find_elements(By.CSS_SELECTOR, 'a.story-link h2.home-title') | ||||||
|  |             if not posts: | ||||||
|  |                 logger.info("No posts found on the page") | ||||||
|  |                 return | ||||||
|  |             last_post = posts[0] | ||||||
|  |             title = last_post.text.strip() | ||||||
|  |             link = last_post.get_attribute('href') | ||||||
|  |             post_content = f"{title} - {link}" | ||||||
|  | 
 | ||||||
|  |             if 'data breach' in title.lower() or 'leak' in title.lower(): | ||||||
|  |                 logger.info(post_content) | ||||||
|  |                 # self._save_to_db('Hacker News', post_content) | ||||||
|  |                 logger.info(f"New leak found: {title} - {link}") | ||||||
|  |             else: | ||||||
|  |                 logger.info("Last post is not about leaks") | ||||||
|  | 
 | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Error during forum crawling: {e}") | ||||||
|  |             raise | ||||||
|  |         finally: | ||||||
|  |             driver.quit() | ||||||
|  |             logger.info("WebDriver session closed") | ||||||
|  |          | ||||||
|  |     def _save_to_db(self, source, message): | ||||||
|  |         if not self.conn or self.conn.closed: | ||||||
|  |             self._connect() | ||||||
|  |         try: | ||||||
|  |             with self.conn.cursor() as cur: | ||||||
|  |                 cur.execute( | ||||||
|  |                     "INSERT INTO leaks (resource_name, message) VALUES (%s, %s)", | ||||||
|  |                     (source, message) | ||||||
|  |                 ) | ||||||
|  |             self.conn.commit() | ||||||
|  |             logger.info(f"Leak from {source} saved in the database") | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Error writing to the database: {e}") | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     crawler = ForumCrawler('https://thehackernews.com/search/label/data%20breach') | ||||||
|  |     try: | ||||||
|  |         crawler._crawl_leaks() | ||||||
|  |     finally: | ||||||
|  |         crawler._close() | ||||||
| @ -27,34 +27,30 @@ class DriverCreator: | |||||||
|         ).create_extension() |         ).create_extension() | ||||||
| 
 | 
 | ||||||
|     def get_driver(self): |     def get_driver(self): | ||||||
|         ''' |  | ||||||
|         Отключает JS |  | ||||||
|         Каждый запрос получает `сырой` html |  | ||||||
|         ''' |  | ||||||
|         # extension_path = self._switch_proxy() |         # extension_path = self._switch_proxy() | ||||||
| 
 | 
 | ||||||
|         options = uc.ChromeOptions() |         options = uc.ChromeOptions() | ||||||
|         # options.add_argument(f"--load-extension={extension_path}") # временно |         # options.add_argument(f"--load-extension={extension_path}") # временно | ||||||
|         options.add_argument("--headless=new") |         # options.add_argument("--headless=new") | ||||||
|         options.add_argument("--disable-gpu") |         options.add_argument("--disable-gpu") | ||||||
|         options.add_argument("--disable-dev-shm-usage") |         options.add_argument("--disable-dev-shm-usage") | ||||||
|         options.add_argument("--no-sandbox") |         options.add_argument("--no-sandbox") | ||||||
|         options.add_argument("--disable-webgl") |         # options.add_argument("--disable-webgl") | ||||||
|         options.add_argument("--disable-software-rasterizer") |         # options.add_argument("--disable-software-rasterizer") | ||||||
|         # options.add_argument("--disable-extensions") |         # options.add_argument("--disable-extensions") | ||||||
|          |          | ||||||
|         prefs = {"profile.managed_default_content_settings.javascript": 2} |         # prefs = {"profile.managed_default_content_settings.javascript": 2} | ||||||
|         options.experimental_options["prefs"] = prefs |         # options.experimental_options["prefs"] = prefs | ||||||
|          |          | ||||||
|         driver = uc.Chrome( |         driver = uc.Chrome( | ||||||
|             options=options, |             options=options, | ||||||
|             version_main=132, |             version_main=135, | ||||||
|             # user_multi_procs=True |             # user_multi_procs=True | ||||||
|         ) |         ) | ||||||
| 
 | 
 | ||||||
|         driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { |         # driver.execute_cdp_cmd("Page.addScriptToEvaluateOnNewDocument", { | ||||||
|             "source": "Object.defineProperty(navigator, 'javaEnabled', {get: () => false});" |         #     "source": "Object.defineProperty(navigator, 'javaEnabled', {get: () => false});" | ||||||
|         }) |         # }) | ||||||
|         driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True}) |         # driver.execute_cdp_cmd("Emulation.setScriptExecutionDisabled", {"value": True}) | ||||||
|          |          | ||||||
|         return driver |         return driver | ||||||
							
								
								
									
										28
									
								
								initiator.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										28
									
								
								initiator.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,28 @@ | |||||||
|  | from celery_app import app | ||||||
|  | from utils.logg import LoggerSingleton | ||||||
|  | 
 | ||||||
|  | logger = LoggerSingleton.get_logger() | ||||||
|  | 
 | ||||||
|  | # Брокер центральный узел, который: | ||||||
|  | # Принимает задачи от beat (планировщика) | ||||||
|  | # Распределяет их по очередям (telegram, forum). | ||||||
|  | # Передаёт задачи воркерам, которые подписаны на эти очереди. | ||||||
|  | 
 | ||||||
|  | def main(): | ||||||
|  |     try: | ||||||
|  |         logger.info("Starting Celery worker...") | ||||||
|  |         app.worker_main( | ||||||
|  |             argv=[ | ||||||
|  |                 'worker', | ||||||
|  |                 '--loglevel=debug',  | ||||||
|  |                 '--pool=solo', | ||||||
|  |                 '-Q', 'telegram', | ||||||
|  |                 '--without-heartbeat', | ||||||
|  |                 '--without-gossip' | ||||||
|  |             ] | ||||||
|  |         ) | ||||||
|  |     except Exception as e: | ||||||
|  |         logger.error(f"Failed to start worker: {e}") | ||||||
|  | 
 | ||||||
|  | if __name__ == '__main__': | ||||||
|  |     main() | ||||||
| @ -9,3 +9,7 @@ telethon | |||||||
| schedule | schedule | ||||||
| psycopg2-binary | psycopg2-binary | ||||||
| docker | docker | ||||||
|  | asyncio | ||||||
|  | pytz | ||||||
|  | flask | ||||||
|  | apscheduler | ||||||
							
								
								
									
										0
									
								
								tasks/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										0
									
								
								tasks/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
								
								
									
										97
									
								
								tasks/forum_crawler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								tasks/forum_crawler.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,97 @@ | |||||||
|  | from bs4 import BeautifulSoup | ||||||
|  | import os | ||||||
|  | import psycopg2 | ||||||
|  | from dotenv import load_dotenv | ||||||
|  | from celery import Celery | ||||||
|  | from utils.logg import LoggerSingleton | ||||||
|  | from driver.driver_creator import DriverCreator | ||||||
|  | from selenium.webdriver.common.by import By | ||||||
|  | from time import sleep | ||||||
|  | from celery_app import app | ||||||
|  | 
 | ||||||
|  | load_dotenv() | ||||||
|  | 
 | ||||||
|  | logger = LoggerSingleton.get_logger() | ||||||
|  | 
 | ||||||
|  | class ForumCrawler: | ||||||
|  |     def __init__(self, forum_url, proxy_list): | ||||||
|  |         self.forum_url = forum_url | ||||||
|  |         self.proxy_list = proxy_list | ||||||
|  |         self.db_config = { | ||||||
|  |             'host': os.getenv('DB_HOST'), | ||||||
|  |             'port': os.getenv('DB_PORT'), | ||||||
|  |             'database': os.getenv('DB_NAME'), | ||||||
|  |             'user': os.getenv('DB_USER'), | ||||||
|  |             'password': os.getenv('DB_PASSWORD') | ||||||
|  |         } | ||||||
|  |         self.conn = None | ||||||
|  |         self._connect() | ||||||
|  | 
 | ||||||
|  |     def _connect(self): | ||||||
|  |         try: | ||||||
|  |             self.conn = psycopg2.connect(**self.db_config) | ||||||
|  |             logger.info("Database connection established") | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Database connection error: {e}") | ||||||
|  |             raise | ||||||
|  | 
 | ||||||
|  |     def _close(self): | ||||||
|  |         if self.conn and not self.conn.closed: | ||||||
|  |             try: | ||||||
|  |                 self.conn.close() | ||||||
|  |                 logger.info("Database connection closed") | ||||||
|  |             except Exception as e: | ||||||
|  |                 logger.error(f"Error closing database connection: {e}") | ||||||
|  | 
 | ||||||
|  |     def _crawl_leaks(self): | ||||||
|  |         driver_creator = DriverCreator(self.proxy_list) | ||||||
|  |         driver = driver_creator.get_driver() | ||||||
|  | 
 | ||||||
|  |         try: | ||||||
|  |             logger.info(f"Starting forum crawl: {self.forum_url}") | ||||||
|  |             driver.get(self.forum_url) | ||||||
|  |             sleep(2) | ||||||
|  | 
 | ||||||
|  |             posts = driver.find_elements(By.CSS_SELECTOR, 'a.story-link h2.home-title') | ||||||
|  |             if not posts: | ||||||
|  |                 logger.info("No posts found on the page") | ||||||
|  |                 return | ||||||
|  |             last_post = posts[0] | ||||||
|  |             title = last_post.text.strip() | ||||||
|  |             link = last_post.get_attribute('href') | ||||||
|  |             post_content = f"{title} - {link}" | ||||||
|  | 
 | ||||||
|  |             if 'data breach' in title.lower() or 'leak' in title.lower(): | ||||||
|  |                 self._save_to_db('Hacker News', post_content) | ||||||
|  |                 logger.info(f"New leak found: {title} - {link}") | ||||||
|  |             else: | ||||||
|  |                 logger.info("Last post is not about leaks") | ||||||
|  | 
 | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Error during forum crawling: {e}") | ||||||
|  |             raise | ||||||
|  |         finally: | ||||||
|  |             driver.quit() | ||||||
|  |             logger.info("WebDriver session closed") | ||||||
|  |          | ||||||
|  |     def _save_to_db(self, source, message): | ||||||
|  |         if not self.conn or self.conn.closed: | ||||||
|  |             self._connect() | ||||||
|  |         try: | ||||||
|  |             with self.conn.cursor() as cur: | ||||||
|  |                 cur.execute( | ||||||
|  |                     "INSERT INTO leaks (resource_name, message) VALUES (%s, %s)", | ||||||
|  |                     (source, message) | ||||||
|  |                 ) | ||||||
|  |             self.conn.commit() | ||||||
|  |             logger.info(f"Leak from {source} saved in the database") | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Error writing to the database: {e}") | ||||||
|  | 
 | ||||||
|  | @app.task(name='forum_crawler.crawl_forum_task') | ||||||
|  | def crawl_forum_task(forum_url, proxy_list): | ||||||
|  |     crawler = ForumCrawler(forum_url, proxy_list) | ||||||
|  |     try: | ||||||
|  |         crawler._crawl_leaks() | ||||||
|  |     finally: | ||||||
|  |         crawler._close() | ||||||
							
								
								
									
										121
									
								
								tasks/tg_crawler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										121
									
								
								tasks/tg_crawler.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,121 @@ | |||||||
|  | import logging | ||||||
|  | import os | ||||||
|  | import asyncio | ||||||
|  | import psycopg2 | ||||||
|  | from datetime import datetime | ||||||
|  | from telethon.sync import TelegramClient | ||||||
|  | from telethon.tl.functions.messages import GetHistoryRequest | ||||||
|  | from dotenv import load_dotenv | ||||||
|  | from celery import Celery | ||||||
|  | from utils.logg import LoggerSingleton | ||||||
|  | from celery_app import app | ||||||
|  | 
 | ||||||
|  | load_dotenv() | ||||||
|  | 
 | ||||||
|  | logger = LoggerSingleton.get_logger() | ||||||
|  | 
 | ||||||
|  | class TelegramMonitor: | ||||||
|  |     def __init__(self): | ||||||
|  |         self.db_config = { | ||||||
|  |             'host': os.getenv('DB_HOST'), | ||||||
|  |             'port': os.getenv('DB_PORT'), | ||||||
|  |             'database': os.getenv('DB_NAME'), | ||||||
|  |             'user': os.getenv('DB_USER'), | ||||||
|  |             'password': os.getenv('DB_PASSWORD') | ||||||
|  |         } | ||||||
|  |         self.conn = None | ||||||
|  |         self._connect() | ||||||
|  | 
 | ||||||
|  |     def _connect(self): | ||||||
|  |         try: | ||||||
|  |             self.conn = psycopg2.connect(**self.db_config) | ||||||
|  |             logger.info("Database connection established") | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Database connection error: {e}") | ||||||
|  |             raise | ||||||
|  | 
 | ||||||
|  |     def _close(self): | ||||||
|  |         if self.conn and not self.conn.closed: | ||||||
|  |             try: | ||||||
|  |                 self.conn.close() | ||||||
|  |                 logger.info("Database connection closed") | ||||||
|  |             except Exception as e: | ||||||
|  |                 logger.error(f"Error closing database connection: {e}") | ||||||
|  | 
 | ||||||
|  |     def _fetch_post(self, channel_username, source_name): | ||||||
|  |         try: | ||||||
|  |             with TelegramClient('session', os.getenv('API_ID'), os.getenv('API_HASH')) as client: | ||||||
|  |                 entity = client.get_entity(channel_username) | ||||||
|  |                 history = client(GetHistoryRequest( | ||||||
|  |                     peer=entity, | ||||||
|  |                     limit=1, | ||||||
|  |                     offset_date=None, | ||||||
|  |                     offset_id=0, | ||||||
|  |                     max_id=0, | ||||||
|  |                     min_id=0, | ||||||
|  |                     add_offset=0, | ||||||
|  |                     hash=0 | ||||||
|  |                 )) | ||||||
|  |                 if history.messages: | ||||||
|  |                     self._save_to_db(source_name, history.messages[0].message) | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Error fetching post from {source_name}: {e}") | ||||||
|  |             raise | ||||||
|  | 
 | ||||||
|  |     def _save_to_db(self, source, message): | ||||||
|  |         if not self.conn or self.conn.closed: | ||||||
|  |             self._connect() | ||||||
|  |              | ||||||
|  |         try: | ||||||
|  |             with self.conn.cursor() as cur: | ||||||
|  |                 cur.execute( | ||||||
|  |                     "INSERT INTO leaks (resource_name, message) VALUES (%s, %s)", | ||||||
|  |                     (source, message) | ||||||
|  |                 ) | ||||||
|  |             self.conn.commit() | ||||||
|  |             logger.info(f"Data from {source} saved successfully") | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Database save error for {source}: {e}") | ||||||
|  |             self.conn.rollback() | ||||||
|  |             raise | ||||||
|  | 
 | ||||||
|  | @app.task(bind=True, name='tasks.tg_crawler.monitor_channels') | ||||||
|  | def monitor_channels(self): | ||||||
|  |     channels = [ | ||||||
|  |         ('trueosint', 'trueosint'), | ||||||
|  |         ('dataleak', 'dataleak'), | ||||||
|  |         ('Vaultofdataleaksss', 'Vaultofdataleaksss') | ||||||
|  |     ] | ||||||
|  |      | ||||||
|  |     logger.info("Starting Telegram channels monitoring") | ||||||
|  |      | ||||||
|  |     for channel, source in channels: | ||||||
|  |         try: | ||||||
|  |             monitor_channel.apply_async( | ||||||
|  |                 args=(channel, source), | ||||||
|  |                 queue='telegram', | ||||||
|  |                 retry=True, | ||||||
|  |                 retry_policy={ | ||||||
|  |                     'max_retries': 3, | ||||||
|  |                     'interval_start': 2, | ||||||
|  |                     'interval_step': 5, | ||||||
|  |                     'interval_max': 20 | ||||||
|  |                 } | ||||||
|  |             ) | ||||||
|  |             logger.info(f"Sent task for channel: {channel}") | ||||||
|  |         except Exception as e: | ||||||
|  |             logger.error(f"Failed to send task for {channel}: {e}") | ||||||
|  |             raise self.retry(exc=e) | ||||||
|  | 
 | ||||||
|  | @app.task(bind=True, name='tasks.tg_crawler.monitor_channel') | ||||||
|  | def monitor_channel(self, channel, source): | ||||||
|  |     logger.info(f"Starting monitoring channel: {channel}") | ||||||
|  |     monitor = TelegramMonitor() | ||||||
|  |     try: | ||||||
|  |         monitor._fetch_post(channel, source) | ||||||
|  |         logger.info(f"Successfully monitored channel: {channel}") | ||||||
|  |     except Exception as e: | ||||||
|  |         logger.error(f"Channel monitoring failed for {channel}: {e}") | ||||||
|  |         raise self.retry(exc=e) | ||||||
|  |     finally: | ||||||
|  |         monitor._close() | ||||||
| @ -1,77 +0,0 @@ | |||||||
| import logging |  | ||||||
| import os |  | ||||||
| import psycopg2 |  | ||||||
| from datetime import datetime |  | ||||||
| from telethon.sync import TelegramClient |  | ||||||
| from telethon.tl.functions.messages import GetHistoryRequest |  | ||||||
| 
 |  | ||||||
| LOG_FILE = os.getenv('LOG_FILE', '/app/tg_nodes.log') |  | ||||||
| 
 |  | ||||||
| if os.path.exists(LOG_FILE) and os.path.isdir(LOG_FILE): |  | ||||||
|     raise RuntimeError(f"Path {LOG_FILE} is a directory! Expected file.") |  | ||||||
| 
 |  | ||||||
| logging.basicConfig( |  | ||||||
|     handlers=[ |  | ||||||
|         logging.FileHandler(LOG_FILE), |  | ||||||
|         logging.StreamHandler() |  | ||||||
|     ] |  | ||||||
| ) |  | ||||||
| 
 |  | ||||||
| class TelegramChannelMonitor: |  | ||||||
|     db_config = None |  | ||||||
| 
 |  | ||||||
|     def __init__(self, session_name, api_id, api_hash, channel_username, source_name): |  | ||||||
|         self.session_name = session_name |  | ||||||
|         self.api_id = api_id |  | ||||||
|         self.api_hash = api_hash |  | ||||||
|         self.channel_username = channel_username |  | ||||||
|         self.source_name = source_name |  | ||||||
| 
 |  | ||||||
|     @classmethod |  | ||||||
|     def set_db_config(cls, config): |  | ||||||
|         cls.db_config = config |  | ||||||
| 
 |  | ||||||
|     def fetch_last_post(self): |  | ||||||
|         logging.info(f"[{self.source_name}] checking a new post...") |  | ||||||
| 
 |  | ||||||
|         try: |  | ||||||
|             with TelegramClient(self.session_name, self.api_id, self.api_hash) as client: |  | ||||||
|                 entity = client.get_entity(self.channel_username) |  | ||||||
|                 history = client(GetHistoryRequest( |  | ||||||
|                     peer=entity, |  | ||||||
|                     limit=1, |  | ||||||
|                     offset_date=None, |  | ||||||
|                     offset_id=0, |  | ||||||
|                     max_id=0, |  | ||||||
|                     min_id=0, |  | ||||||
|                     add_offset=0, |  | ||||||
|                     hash=0 |  | ||||||
|                 )) |  | ||||||
| 
 |  | ||||||
|                 if history.messages: |  | ||||||
|                     msg = history.messages[0] |  | ||||||
|                     logging.info(f"[{self.source_name}] received a post: {msg.message[:60]}...") |  | ||||||
|                     self.save_to_db(self.source_name, msg.message) |  | ||||||
|                 else: |  | ||||||
|                     logging.info(f"[{self.source_name}] there is no new messages") |  | ||||||
|         except Exception as e: |  | ||||||
|             logging.error(f"[{self.source_name}] error when receiving a post: {e}") |  | ||||||
| 
 |  | ||||||
|     def save_to_db(self, source, message): |  | ||||||
|         if not self.db_config: |  | ||||||
|             logging.error("DB config is not set") |  | ||||||
|             return |  | ||||||
| 
 |  | ||||||
|         try: |  | ||||||
|             conn = psycopg2.connect(**self.db_config) |  | ||||||
|             cur = conn.cursor() |  | ||||||
|             cur.execute( |  | ||||||
|                 "INSERT INTO leaks (source, message) VALUES (%s, %s)", |  | ||||||
|                 (source, message) |  | ||||||
|             ) |  | ||||||
|             conn.commit() |  | ||||||
|             cur.close() |  | ||||||
|             conn.close() |  | ||||||
|             logging.info(f"[{self.source_name}] message is recorded in the database") |  | ||||||
|         except Exception as e: |  | ||||||
|             logging.error(f"[{self.source_name}] error when writing to the database: {e}") |  | ||||||
| @ -1,47 +0,0 @@ | |||||||
| import asyncio |  | ||||||
| import os |  | ||||||
| from apscheduler.schedulers.background import BackgroundScheduler |  | ||||||
| from pytz import timezone |  | ||||||
| from tg_crawler import TelegramChannelMonitor |  | ||||||
| from dotenv import load_dotenv |  | ||||||
| import logging |  | ||||||
| 
 |  | ||||||
| load_dotenv() |  | ||||||
| 
 |  | ||||||
| def main(): |  | ||||||
|     TelegramChannelMonitor.set_db_config({ |  | ||||||
|         'host': os.getenv("HOST"), |  | ||||||
|         'port': os.getenv("PORT"), |  | ||||||
|         'database': os.getenv("DBNAME"), |  | ||||||
|         'user': os.getenv("USER"), |  | ||||||
|         'password': os.getenv("PASSWORD") |  | ||||||
|     }) |  | ||||||
| 
 |  | ||||||
|     monitor = TelegramChannelMonitor( |  | ||||||
|         session_name='session_trueosint', |  | ||||||
|         api_id=os.getenv("TELETHON_API_ID"), |  | ||||||
|         api_hash=os.getenv("TELETHON_API_HASH"), |  | ||||||
|         channel_username='trueosint', |  | ||||||
|         source_name='trueosint' |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     scheduler = BackgroundScheduler() |  | ||||||
|     scheduler.add_job( |  | ||||||
|         monitor.fetch_last_post, |  | ||||||
|         'cron', |  | ||||||
|         hour=9, |  | ||||||
|         minute=0, |  | ||||||
|         timezone=timezone("Europe/Moscow") |  | ||||||
|     ) |  | ||||||
|      |  | ||||||
|     try: |  | ||||||
|         scheduler.start() |  | ||||||
|         logging.info("Scheduler started successfully") |  | ||||||
|         while True: |  | ||||||
|             pass |  | ||||||
|     except (KeyboardInterrupt, SystemExit): |  | ||||||
|         scheduler.shutdown() |  | ||||||
|         logging.info("Scheduler shut down successfully") |  | ||||||
| 
 |  | ||||||
| if __name__ == '__main__': |  | ||||||
|     main() |  | ||||||
| @ -1,48 +0,0 @@ | |||||||
| import asyncio |  | ||||||
| import os |  | ||||||
| from apscheduler.schedulers.background import BackgroundScheduler |  | ||||||
| from pytz import timezone |  | ||||||
| from tg_crawler import TelegramChannelMonitor |  | ||||||
| from dotenv import load_dotenv |  | ||||||
| import logging |  | ||||||
| 
 |  | ||||||
| 
 |  | ||||||
| load_dotenv() |  | ||||||
| 
 |  | ||||||
| def main(): |  | ||||||
|     TelegramChannelMonitor.set_db_config({ |  | ||||||
|         'host': os.getenv("HOST"), |  | ||||||
|         'port': os.getenv("PORT"), |  | ||||||
|         'database': os.getenv("DBNAME"), |  | ||||||
|         'user': os.getenv("USER"), |  | ||||||
|         'password': os.getenv("PASSWORD") |  | ||||||
|     }) |  | ||||||
| 
 |  | ||||||
|     monitor = TelegramChannelMonitor( |  | ||||||
|         session_name='session_trueosint', |  | ||||||
|         api_id=os.getenv("TELETHON_API_ID"), |  | ||||||
|         api_hash=os.getenv("TELETHON_API_HASH"), |  | ||||||
|         channel_username='dataleak', |  | ||||||
|         source_name='dataleak' |  | ||||||
|     ) |  | ||||||
| 
 |  | ||||||
|     scheduler = BackgroundScheduler() |  | ||||||
|     scheduler.add_job( |  | ||||||
|         monitor.fetch_last_post, |  | ||||||
|         'cron', |  | ||||||
|         hour=9, |  | ||||||
|         minute=0, |  | ||||||
|         timezone=timezone("Europe/Moscow") |  | ||||||
|     ) |  | ||||||
|      |  | ||||||
|     try: |  | ||||||
|         scheduler.start() |  | ||||||
|         logging.info("Scheduler started successfully") |  | ||||||
|         while True: |  | ||||||
|             pass |  | ||||||
|     except (KeyboardInterrupt, SystemExit): |  | ||||||
|         scheduler.shutdown() |  | ||||||
|         logging.info("Scheduler shut down successfully") |  | ||||||
| 
 |  | ||||||
| if __name__ == '__main__': |  | ||||||
|     main() |  | ||||||
							
								
								
									
										27
									
								
								utils/logg.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										27
									
								
								utils/logg.py
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,27 @@ | |||||||
|  | import logging | ||||||
|  | 
 | ||||||
|  | class LoggerSingleton: | ||||||
|  |     _logger = None | ||||||
|  | 
 | ||||||
|  |     @staticmethod | ||||||
|  |     def get_logger(): | ||||||
|  |         if LoggerSingleton._logger is None: | ||||||
|  |             LoggerSingleton._logger = logging.getLogger(__name__) | ||||||
|  | 
 | ||||||
|  |             LoggerSingleton._logger.setLevel(logging.INFO) | ||||||
|  | 
 | ||||||
|  |             formatter = logging.Formatter( | ||||||
|  |                 "%(asctime)s - %(name)s - %(levelname)s - %(message)s", | ||||||
|  |                 datefmt="%Y-%m-%d %H:%M:%S" | ||||||
|  |             ) | ||||||
|  | 
 | ||||||
|  |             console_handler = logging.StreamHandler() | ||||||
|  |             console_handler.setFormatter(formatter) | ||||||
|  |              | ||||||
|  |             file_handler = logging.FileHandler('tempora.log', encoding='utf-8') | ||||||
|  |             file_handler.setFormatter(formatter) | ||||||
|  |              | ||||||
|  |             LoggerSingleton._logger.addHandler(console_handler) | ||||||
|  |             LoggerSingleton._logger.addHandler(file_handler) | ||||||
|  | 
 | ||||||
|  |         return LoggerSingleton._logger | ||||||
| @ -102,6 +102,7 @@ def get_leaks_stats(): | |||||||
|             conn.close() |             conn.close() | ||||||
| 
 | 
 | ||||||
| @app.route("/") | @app.route("/") | ||||||
|  | @app.route("/index.html") | ||||||
| def index(): | def index(): | ||||||
|     parser_status = get_parser_status() |     parser_status = get_parser_status() | ||||||
|     leaks_stats = get_leaks_stats() |     leaks_stats = get_leaks_stats() | ||||||
| @ -120,6 +121,7 @@ def index(): | |||||||
|         leaks_stats=leaks_stats |         leaks_stats=leaks_stats | ||||||
|     ) |     ) | ||||||
| 
 | 
 | ||||||
|  | @app.route("/logs.html") | ||||||
| @app.route("/logs") | @app.route("/logs") | ||||||
| def logs(): | def logs(): | ||||||
|     log_path = '/app/tg_nodes.log' |     log_path = '/app/tg_nodes.log' | ||||||
|  | |||||||
| @ -99,8 +99,10 @@ | |||||||
|       padding: 20px; |       padding: 20px; | ||||||
|       margin-left: 250px; |       margin-left: 250px; | ||||||
|       transition: margin-left 0.3s ease; |       transition: margin-left 0.3s ease; | ||||||
|       position: relative; |       min-height: 100vh; | ||||||
|       z-index: 1; |       display: flex; | ||||||
|  |       justify-content: center; | ||||||
|  |       align-items: center; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     .container.collapsed { |     .container.collapsed { | ||||||
| @ -116,7 +118,7 @@ | |||||||
|       height: 100%; |       height: 100%; | ||||||
|       object-fit: cover; |       object-fit: cover; | ||||||
|       z-index: -1; |       z-index: -1; | ||||||
|       opacity: 0.8; /* Полупрозрачность видео */ |       opacity: 0.8; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /* Затемнение поверх видео */ |     /* Затемнение поверх видео */ | ||||||
| @ -126,23 +128,18 @@ | |||||||
|       left: 0; |       left: 0; | ||||||
|       width: 100%; |       width: 100%; | ||||||
|       height: 100%; |       height: 100%; | ||||||
|       background: rgba(0, 0, 0, 0.7); /* Черный полупрозрачный слой */ |       background: rgba(0, 0, 0, 0.7); | ||||||
|       z-index: -1; |       z-index: -1; | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     /* Контейнер для логов */ |     /* Контейнер для логов */ | ||||||
|     .logs-container { |     .logs-container { | ||||||
|       position: absolute; |       width: 90%; | ||||||
|       top: 50%; |       max-width: 800px; | ||||||
|       left: 20px; |       background-color: rgba(46, 46, 46, 0.9); | ||||||
|       transform: translateY(-50%); |  | ||||||
|       width: 40%; |  | ||||||
|       max-width: 600px; |  | ||||||
|       background-color: rgba(46, 46, 46, 0.9); /* Полупрозрачный фон */ |  | ||||||
|       padding: 20px; |       padding: 20px; | ||||||
|       border-radius: 10px; |       border-radius: 10px; | ||||||
|       box-shadow: 0 0 20px rgba(51, 153, 255, 0.5); |       box-shadow: 0 0 20px rgba(51, 153, 255, 0.5); | ||||||
|       z-index: 1; |  | ||||||
|     } |     } | ||||||
| 
 | 
 | ||||||
|     .logs-container h2 { |     .logs-container h2 { | ||||||
|  | |||||||
		Loading…
	
		Reference in New Issue
	
	Block a user