""" Docker Log Monitor for N8N This module provides functionality to monitor N8N Docker container logs in real-time to catch errors and analyze workflow execution. """ import subprocess import threading import json import re import logging from datetime import datetime from typing import Dict, List, Any, Optional, Callable from pathlib import Path import time logger = logging.getLogger(__name__) class DockerLogMonitor: """Monitor N8N Docker container logs for error detection and analysis.""" def __init__(self, container_name: str = "n8n", compose_file: Optional[str] = None): """ Initialize the Docker log monitor. Args: container_name: Name of the N8N Docker container compose_file: Path to docker-compose file (optional) """ self.container_name = container_name self.compose_file = compose_file self.is_monitoring = False self.monitor_thread = None self.log_callbacks = [] self.error_patterns = [ r"ERROR", r"FATAL", r"Exception", r"Error:", r"Failed", r"Workflow execution.*failed", r"Node.*failed", r"Timeout", r"Connection.*failed", r"Authentication.*failed" ] self.log_buffer = [] self.max_buffer_size = 1000 def add_log_callback(self, callback: Callable[[Dict[str, Any]], None]): """ Add a callback function to be called when new logs are received. Args: callback: Function to call with log entry data """ self.log_callbacks.append(callback) def start_monitoring(self, tail_lines: int = 100, follow: bool = True) -> bool: """ Start monitoring Docker logs. Args: tail_lines: Number of existing log lines to retrieve follow: Whether to follow new logs in real-time Returns: True if monitoring started successfully """ if self.is_monitoring: logger.warning("Log monitoring is already running") return False try: self.is_monitoring = True self.monitor_thread = threading.Thread( target=self._monitor_logs, args=(tail_lines, follow), daemon=True ) self.monitor_thread.start() logger.info(f"Started monitoring logs for container: {self.container_name}") return True except Exception as e: logger.error(f"Failed to start log monitoring: {e}") self.is_monitoring = False return False def stop_monitoring(self): """Stop monitoring Docker logs.""" self.is_monitoring = False if self.monitor_thread and self.monitor_thread.is_alive(): self.monitor_thread.join(timeout=5) logger.info("Stopped log monitoring") def _monitor_logs(self, tail_lines: int, follow: bool): """Internal method to monitor Docker logs.""" try: # Build docker logs command if self.compose_file: cmd = [ "docker-compose", "-f", self.compose_file, "logs", "--tail", str(tail_lines) ] if follow: cmd.append("-f") cmd.append(self.container_name) else: cmd = [ "docker", "logs", "--tail", str(tail_lines) ] if follow: cmd.append("-f") cmd.append(self.container_name) # Start subprocess process = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, universal_newlines=True, bufsize=1 ) # Read logs line by line while self.is_monitoring and process.poll() is None: line = process.stdout.readline() if line: self._process_log_line(line.strip()) # Process any remaining output if process.poll() is not None: remaining_output = process.stdout.read() if remaining_output: for line in remaining_output.split('\n'): if line.strip(): self._process_log_line(line.strip()) except Exception as e: logger.error(f"Error in log monitoring: {e}") finally: self.is_monitoring = False def _process_log_line(self, line: str): """Process a single log line.""" try: # Parse timestamp and message log_entry = self._parse_log_line(line) # Add to buffer self.log_buffer.append(log_entry) if len(self.log_buffer) > self.max_buffer_size: self.log_buffer.pop(0) # Check for errors if self._is_error_line(line): log_entry['is_error'] = True logger.warning(f"Error detected in N8N logs: {line}") # Call registered callbacks for callback in self.log_callbacks: try: callback(log_entry) except Exception as e: logger.error(f"Error in log callback: {e}") except Exception as e: logger.error(f"Error processing log line: {e}") def _parse_log_line(self, line: str) -> Dict[str, Any]: """Parse a log line into structured data.""" log_entry = { 'timestamp': datetime.now().isoformat(), 'raw_line': line, 'message': line, 'level': 'INFO', 'is_error': False, 'workflow_id': None, 'execution_id': None, 'node_name': None } # Try to extract timestamp from log line timestamp_match = re.search(r'(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2})', line) if timestamp_match: log_entry['timestamp'] = timestamp_match.group(1) # Extract log level level_match = re.search(r'\b(DEBUG|INFO|WARN|ERROR|FATAL)\b', line, re.IGNORECASE) if level_match: log_entry['level'] = level_match.group(1).upper() # Try to extract workflow ID workflow_id_match = re.search(r'workflow[_\s]*id[:\s]*([a-zA-Z0-9-]+)', line, re.IGNORECASE) if workflow_id_match: log_entry['workflow_id'] = workflow_id_match.group(1) # Try to extract execution ID execution_id_match = re.search(r'execution[_\s]*id[:\s]*([a-zA-Z0-9-]+)', line, re.IGNORECASE) if execution_id_match: log_entry['execution_id'] = execution_id_match.group(1) # Try to extract node name node_match = re.search(r'node[:\s]*(["\']?)([^"\'\\s]+)\1', line, re.IGNORECASE) if node_match: log_entry['node_name'] = node_match.group(2) return log_entry def _is_error_line(self, line: str) -> bool: """Check if a log line contains an error.""" for pattern in self.error_patterns: if re.search(pattern, line, re.IGNORECASE): return True return False def get_recent_logs(self, count: int = 50, error_only: bool = False) -> List[Dict[str, Any]]: """ Get recent logs from buffer. Args: count: Number of log entries to return error_only: If True, return only error logs Returns: List of log entries """ logs = self.log_buffer[-count:] if count > 0 else self.log_buffer if error_only: logs = [log for log in logs if log.get('is_error', False)] return logs def get_logs_for_workflow(self, workflow_id: str, since_minutes: int = 60) -> List[Dict[str, Any]]: """ Get logs for a specific workflow. Args: workflow_id: ID of the workflow since_minutes: Look for logs in the last N minutes Returns: List of log entries for the workflow """ cutoff_time = datetime.now().timestamp() - (since_minutes * 60) workflow_logs = [] for log in self.log_buffer: if log.get('workflow_id') == workflow_id: try: log_time = datetime.fromisoformat(log['timestamp'].replace('Z', '+00:00')).timestamp() if log_time >= cutoff_time: workflow_logs.append(log) except: # If timestamp parsing fails, include the log anyway workflow_logs.append(log) return workflow_logs def get_error_summary(self, since_minutes: int = 60) -> Dict[str, Any]: """ Get a summary of errors in the specified time period. Args: since_minutes: Look for errors in the last N minutes Returns: Error summary with counts and patterns """ cutoff_time = datetime.now().timestamp() - (since_minutes * 60) errors = [] for log in self.log_buffer: if log.get('is_error', False): try: log_time = datetime.fromisoformat(log['timestamp'].replace('Z', '+00:00')).timestamp() if log_time >= cutoff_time: errors.append(log) except: errors.append(log) # Analyze error patterns error_patterns = {} workflow_errors = {} node_errors = {} for error in errors: # Count by message pattern message = error.get('message', '') for pattern in self.error_patterns: if re.search(pattern, message, re.IGNORECASE): error_patterns[pattern] = error_patterns.get(pattern, 0) + 1 # Count by workflow workflow_id = error.get('workflow_id') if workflow_id: workflow_errors[workflow_id] = workflow_errors.get(workflow_id, 0) + 1 # Count by node node_name = error.get('node_name') if node_name: node_errors[node_name] = node_errors.get(node_name, 0) + 1 return { 'total_errors': len(errors), 'time_period_minutes': since_minutes, 'error_patterns': error_patterns, 'workflow_errors': workflow_errors, 'node_errors': node_errors, 'recent_errors': errors[-10:] if errors else [] # Last 10 errors } def save_logs_to_file(self, filepath: str, error_only: bool = False, workflow_id: Optional[str] = None) -> str: """ Save logs to a file. Args: filepath: Path to save the logs error_only: If True, save only error logs workflow_id: If specified, save only logs for this workflow Returns: Path to the saved file """ logs_to_save = self.log_buffer.copy() # Filter by workflow if specified if workflow_id: logs_to_save = [log for log in logs_to_save if log.get('workflow_id') == workflow_id] # Filter by error status if specified if error_only: logs_to_save = [log for log in logs_to_save if log.get('is_error', False)] # Create directory if it doesn't exist Path(filepath).parent.mkdir(parents=True, exist_ok=True) # Save logs with open(filepath, 'w') as f: for log in logs_to_save: f.write(f"{json.dumps(log)}\n") logger.info(f"Saved {len(logs_to_save)} log entries to: {filepath}") return filepath def is_container_running(self) -> bool: """ Check if the N8N container is running. Returns: True if container is running """ try: if self.compose_file: result = subprocess.run( ["docker-compose", "-f", self.compose_file, "ps", "-q", self.container_name], capture_output=True, text=True, timeout=10 ) else: result = subprocess.run( ["docker", "ps", "-q", "-f", f"name={self.container_name}"], capture_output=True, text=True, timeout=10 ) return bool(result.stdout.strip()) except Exception as e: logger.error(f"Error checking container status: {e}") return False def get_container_info(self) -> Dict[str, Any]: """ Get information about the N8N container. Returns: Container information """ try: if self.compose_file: result = subprocess.run( ["docker-compose", "-f", self.compose_file, "ps", self.container_name], capture_output=True, text=True, timeout=10 ) else: result = subprocess.run( ["docker", "inspect", self.container_name], capture_output=True, text=True, timeout=10 ) # Parse JSON output for docker inspect if result.returncode == 0: container_data = json.loads(result.stdout)[0] return { 'name': self.container_name, 'status': container_data['State']['Status'], 'running': container_data['State']['Running'], 'started_at': container_data['State']['StartedAt'], 'image': container_data['Config']['Image'], 'ports': container_data['NetworkSettings']['Ports'] } return { 'name': self.container_name, 'status': 'unknown', 'info': result.stdout if result.returncode == 0 else result.stderr } except Exception as e: logger.error(f"Error getting container info: {e}") return { 'name': self.container_name, 'status': 'error', 'error': str(e) } def create_docker_log_monitor(container_name: str = "n8n", compose_file: Optional[str] = None): """Create a Docker log monitor instance.""" return DockerLogMonitor(container_name, compose_file)