Server : Apache System : Linux indy02.toastserver.com 3.10.0-962.3.2.lve1.5.85.el7.x86_64 #1 SMP Thu Apr 18 15:18:36 UTC 2024 x86_64 User : palandch ( 1163) PHP Version : 7.1.33 Disable Function : NONE Directory : /opt/cloudlinux/venv/lib/python3.11/site-packages/cllvectl/ |
# Module for logging in subprocess. Process related info (call stack) is logged # to simplify investigation of main process hangings # # Copyright © Cloud Linux GmbH & Cloud Linux Software, Inc 2010-2024 All Rights Reserved # # Licensed under CLOUD LINUX LICENSE AGREEMENT # http://cloudlinux.com/docs/LICENSE.TXT import atexit import collections import dataclasses import logging import logging.handlers import os import pathlib import queue import time from datetime import datetime from multiprocessing import Event, Process, Queue from raven.handlers.logging import SentryHandler import psutil from lve_utils.sentry import init_lve_utils_sentry_client # NOTE: set records history size relatively small, # otherwise sentry will trim the log message with history records and process ancestors DEFAULT_RECORDS_HISTORY_SIZE = 10 DEFAULT_LOG_EVENTS_TIMEOUT = 300 DEFAULT_LOG_FORMAT = '%(asctime)s - %(name)s - %(levelname)s - %(message)s' @dataclasses.dataclass class ProcessDescription: """ Process description object pid: int - process id ppid: int | None - parent process id user: str | None - process owner status: str | None - process status start_time: datetime | None - process start time cmdline: str | None - process command line """ pid: int ppid: int | None user: str | None status: str | None start_time: datetime | None cmdline: str | None @classmethod def from_psutil(cls, process: psutil.Process): """Build process description object using psutil.Process object""" try: ppid = process.ppid() except Exception: ppid = None try: cmdline = ' '.join(process.cmdline()) except Exception: cmdline = None try: start_time = datetime.fromtimestamp(process.create_time()) except Exception: start_time = None try: user = process.username() except Exception: user = None try: status = process.status() except Exception: status = None return cls(process.pid, ppid, user, status, start_time, cmdline) def to_str(self): return f"PID: {self.pid}, PPID: {self.ppid}, User: {self.user}, Status: {self.status} " \ f"Start time: {self.start_time} Command: {self.cmdline}" class QueueListenerWithHistory(logging.handlers.QueueListener): def __init__( self, queue, *handlers, timeout: int = DEFAULT_LOG_EVENTS_TIMEOUT, records_history_size: int = DEFAULT_RECORDS_HISTORY_SIZE, ): """Queue listener with log records history :param multiprocessing.Queue queue: log records queue :param int timeout: log records flow timeout, defaults to DEFAULT_LOG_EVENTS_TIMEOUT In case timeput is reached, last log records and process ancestors are logged :param int records_history_size: the max number of records to log in case of timeout """ super().__init__(queue, *handlers, respect_handler_level=True) self._records_history_queue: collections.deque[logging.LogRecord] = collections.deque( iterable=[], maxlen=records_history_size ) self._timeout = timeout self._timout_registred = False def dequeue(self, _block): """Dequeue log event""" while True: try: record = self.queue.get(timeout=self._timeout) self._records_history_queue.append(record) break except queue.Empty: self._handle_queue_timeout() return record def _monitor(self): """Main loop to process log events""" self._handle_monitor_started() super()._monitor() self._handle_monitor_finished() def _handle_monitor_started(self): """Start monitoring log events processing time""" self._start_ts = time.time() # pylint: disable=attribute-defined-outside-init def _handle_monitor_finished(self): """Log that monitor finished processing log records""" if self._timout_registred is True: r = self._create_record( logging.ERROR, 'Processing log queue took %.4f seconds. Pid: %s', time.time() - self._start_ts, os.getpid(), ) self.handle(r) def _get_ancestors(self): """Collect process ancestors up to pid 1""" ancestors: list[ProcessDescription] = [] try: process = psutil.Process() ancestors = [ProcessDescription.from_psutil(process)] while (ppid := process.ppid()) != 0: process = psutil.Process(ppid) ancestors.append(ProcessDescription.from_psutil(process)) return ancestors except Exception: return ancestors def _create_record(self, level, msg, *args): """Create log record""" return logging.LogRecord( name=__name__, level=level, pathname='', lineno=1, msg=msg, args=args, exc_info=None, ) def _handle_queue_timeout(self): """ Handle log queue timeout event. Print last events (incl. omitted because debug disabled) and process ancestors to log. """ if self._timout_registred is False: proc_ancestors = '\n'.join( proc_descr.to_str() for proc_descr in self._get_ancestors()[::-1] ) last_events = '\n'.join( f'[{record.levelname}] - {datetime.fromtimestamp(record.created)}: {record.message}' for record in self._records_history_queue ) error_msg = 'Log queue timeout. Last events:\n%s\nProcess ancestors:\n%s' error_args = (last_events, proc_ancestors) r = self._create_record(logging.ERROR, error_msg, *error_args) self.handle(r) self._timout_registred = True def get_log_level(file_name: str): """Get log level based on debug flag presence""" return logging.DEBUG if pathlib.Path(f'{file_name}.debug.flag').exists() else logging.INFO def run_subpprocess_logger(q: Queue, stop_event: Event, file_name: str, timeout: int): """ Main function to process log events in subprocess :param Queue q: log records queue :param Event stop_event: stop event :param str file_name: log file name """ sentry_handler = SentryHandler(init_lve_utils_sentry_client('lvectl'), level=logging.ERROR) fh = logging.FileHandler(file_name) fh.setFormatter(logging.Formatter(DEFAULT_LOG_FORMAT)) fh.setLevel(get_log_level(file_name)) q_handler = QueueListenerWithHistory(q, fh, sentry_handler, timeout=timeout) q_handler.start() stop_event.wait() q_handler.stop() @dataclasses.dataclass class SubprocessLogger: logger_process: Process stop_event: Event subprocess_loggers: dict[str | None, SubprocessLogger] = {} def init_subprocess_logger( logger_name: str | None, file_name: str, timeout: int = DEFAULT_LOG_EVENTS_TIMEOUT, ) -> logging.Logger: """ Get asynchronous logger running in subprocess Log records asynchronously in subprocess (>=info by default, >= debug when debug flag is present). Log records from debug and higher in subprocess in case of timeout. WARNING: multiprocessing.Queue is used to pass log records. It works asynchornously (passing to subprocess in handled by separate thread). This may lead to log records not being passed to the subprocess in case main process got stuck e.g. in kmodlve kernel space. :param str | None logger_name: logger name :param str file_name: log file :param int timeout: log events processing timeout """ global subprocess_loggers subprocess_logger = subprocess_loggers.get(logger_name) if subprocess_logger is not None: return logging.getLogger(logger_name) logger = logging.getLogger(logger_name) # NOTE: We want to pass all the records (incl. debug) to the subprocess # so that we can log them in case of timeout logger.setLevel(logging.DEBUG) subprocess_logger_queue: Queue = Queue() queue_handler = logging.handlers.QueueHandler(subprocess_logger_queue) queue_handler.setLevel(logging.DEBUG) logger.addHandler(queue_handler) stop_event = Event() logger_process = Process(target=run_subpprocess_logger, args=(subprocess_logger_queue, stop_event, file_name, timeout)) logger_process.start() atexit.register(stop_event.set) subprocess_loggers[logger_name] = SubprocessLogger(logger_process, stop_event) return logger def stop_subprocess_logger(name: str): """ Stop subprocess logger """ global subprocess_loggers subprocess_logger = subprocess_loggers.get(name) if subprocess_logger is None: return subprocess_logger.stop_event.set() subprocess_logger.logger_process.join() del subprocess_loggers[name]