#!/usr/bin/env python3 """ Execution Monitor - Tools for monitoring N8N workflow executions Provides real-time monitoring, alerting, and execution management """ import time import json from typing import Dict, List, Optional, Callable, Any from datetime import datetime, timedelta from dataclasses import dataclass from enum import Enum from threading import Thread, Event import logging class ExecutionStatus(Enum): """Execution status enumeration""" RUNNING = "running" SUCCESS = "success" ERROR = "error" CANCELLED = "cancelled" WAITING = "waiting" @dataclass class ExecutionEvent: """Represents an execution event""" execution_id: str workflow_id: str status: ExecutionStatus timestamp: datetime duration: Optional[float] = None error_message: Optional[str] = None node_data: Optional[Dict] = None class ExecutionMonitor: """Monitors N8N workflow executions and provides real-time insights""" def __init__(self, n8n_client, poll_interval: int = 5): """Initialize execution monitor""" self.client = n8n_client self.poll_interval = poll_interval self.monitoring = False self.stop_event = Event() self.callbacks = { 'on_success': [], 'on_error': [], 'on_start': [], 'on_complete': [] } self.tracked_executions = {} self.logger = self._setup_logger() def _setup_logger(self) -> logging.Logger: """Setup logging for the monitor""" logger = logging.getLogger('N8NExecutionMonitor') logger.setLevel(logging.INFO) if not logger.handlers: handler = logging.StreamHandler() formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') handler.setFormatter(formatter) logger.addHandler(handler) return logger def add_callback(self, event_type: str, callback: Callable[[ExecutionEvent], None]): """Add callback for execution events""" if event_type in self.callbacks: self.callbacks[event_type].append(callback) else: raise ValueError(f"Invalid event type: {event_type}") def start_monitoring(self, workflow_ids: Optional[List[str]] = None): """Start monitoring workflow executions""" if self.monitoring: self.logger.warning("Monitoring already running") return self.monitoring = True self.stop_event.clear() monitor_thread = Thread(target=self._monitor_loop, args=(workflow_ids,)) monitor_thread.daemon = True monitor_thread.start() self.logger.info("Execution monitoring started") def stop_monitoring(self): """Stop monitoring workflow executions""" if not self.monitoring: return self.monitoring = False self.stop_event.set() self.logger.info("Execution monitoring stopped") def _monitor_loop(self, workflow_ids: Optional[List[str]]): """Main monitoring loop""" while self.monitoring and not self.stop_event.is_set(): try: self._check_executions(workflow_ids) time.sleep(self.poll_interval) except Exception as e: self.logger.error(f"Error in monitoring loop: {e}") time.sleep(self.poll_interval) def _check_executions(self, workflow_ids: Optional[List[str]]): """Check for new or updated executions""" try: # Get recent executions executions = self.client.get_executions(limit=50) for execution in executions: execution_id = execution.get('id') workflow_id = execution.get('workflowId') # Filter by workflow IDs if specified if workflow_ids and workflow_id not in workflow_ids: continue # Check if this is a new or updated execution if self._is_execution_updated(execution): event = self._create_execution_event(execution) self._handle_execution_event(event) # Update tracked executions self.tracked_executions[execution_id] = { 'status': execution.get('status'), 'last_updated': datetime.now() } except Exception as e: self.logger.error(f"Error checking executions: {e}") def _is_execution_updated(self, execution: Dict) -> bool: """Check if execution is new or has been updated""" execution_id = execution.get('id') current_status = execution.get('status') if execution_id not in self.tracked_executions: return True tracked_status = self.tracked_executions[execution_id]['status'] return current_status != tracked_status def _create_execution_event(self, execution: Dict) -> ExecutionEvent: """Create ExecutionEvent from execution data""" execution_id = execution.get('id') workflow_id = execution.get('workflowId') status = ExecutionStatus(execution.get('status')) # Calculate duration if available duration = None start_time = execution.get('startedAt') finish_time = execution.get('finishedAt') if start_time and finish_time: start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00')) finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00')) duration = (finish_dt - start_dt).total_seconds() # Extract error message if available error_message = None if status == ExecutionStatus.ERROR: data = execution.get('data', {}) if 'resultData' in data and 'error' in data['resultData']: error_message = data['resultData']['error'].get('message') return ExecutionEvent( execution_id=execution_id, workflow_id=workflow_id, status=status, timestamp=datetime.now(), duration=duration, error_message=error_message, node_data=execution.get('data') ) def _handle_execution_event(self, event: ExecutionEvent): """Handle execution event and trigger callbacks""" self.logger.info(f"Execution {event.execution_id} status: {event.status.value}") # Trigger appropriate callbacks if event.status == ExecutionStatus.SUCCESS: for callback in self.callbacks['on_success']: try: callback(event) except Exception as e: self.logger.error(f"Error in success callback: {e}") elif event.status == ExecutionStatus.ERROR: for callback in self.callbacks['on_error']: try: callback(event) except Exception as e: self.logger.error(f"Error in error callback: {e}") elif event.status == ExecutionStatus.RUNNING: for callback in self.callbacks['on_start']: try: callback(event) except Exception as e: self.logger.error(f"Error in start callback: {e}") # Always trigger complete callbacks for finished executions if event.status in [ExecutionStatus.SUCCESS, ExecutionStatus.ERROR, ExecutionStatus.CANCELLED]: for callback in self.callbacks['on_complete']: try: callback(event) except Exception as e: self.logger.error(f"Error in complete callback: {e}") def execute_and_monitor(self, workflow_id: str, test_data: Optional[Dict] = None, timeout: int = 300) -> ExecutionEvent: """Execute workflow and monitor until completion""" try: # Start execution result = self.client.execute_workflow(workflow_id, test_data) execution_id = result.get('id') if not execution_id: raise Exception("Failed to get execution ID from workflow execution") self.logger.info(f"Started execution {execution_id} for workflow {workflow_id}") # Monitor execution until completion start_time = time.time() while time.time() - start_time < timeout: execution = self.client.get_execution(execution_id) status = execution.get('status') if status in ['success', 'error', 'cancelled']: event = self._create_execution_event(execution) self.logger.info(f"Execution {execution_id} completed with status: {status}") return event time.sleep(2) # Check every 2 seconds # Timeout reached raise TimeoutError(f"Execution {execution_id} did not complete within {timeout} seconds") except Exception as e: self.logger.error(f"Error executing and monitoring workflow: {e}") raise def get_execution_summary(self, hours: int = 24) -> Dict: """Get execution summary for the specified time period""" try: # Get executions from the specified time period executions = self.client.get_executions(limit=200) # Filter by time period cutoff_time = datetime.now() - timedelta(hours=hours) recent_executions = [] for execution in executions: started_at = execution.get('startedAt') if started_at: exec_time = datetime.fromisoformat(started_at.replace('Z', '+00:00')) if exec_time > cutoff_time: recent_executions.append(execution) # Calculate summary statistics total = len(recent_executions) successful = len([e for e in recent_executions if e.get('status') == 'success']) failed = len([e for e in recent_executions if e.get('status') == 'error']) running = len([e for e in recent_executions if e.get('status') == 'running']) # Calculate average duration durations = [] for execution in recent_executions: start_time = execution.get('startedAt') finish_time = execution.get('finishedAt') if start_time and finish_time: start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00')) finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00')) durations.append((finish_dt - start_dt).total_seconds()) avg_duration = sum(durations) / len(durations) if durations else 0 # Group by workflow workflow_stats = {} for execution in recent_executions: workflow_id = execution.get('workflowId') if workflow_id not in workflow_stats: workflow_stats[workflow_id] = {'total': 0, 'success': 0, 'error': 0} workflow_stats[workflow_id]['total'] += 1 status = execution.get('status') if status == 'success': workflow_stats[workflow_id]['success'] += 1 elif status == 'error': workflow_stats[workflow_id]['error'] += 1 return { 'time_period_hours': hours, 'total_executions': total, 'successful_executions': successful, 'failed_executions': failed, 'running_executions': running, 'success_rate': (successful / total * 100) if total > 0 else 0, 'average_duration_seconds': avg_duration, 'workflow_statistics': workflow_stats, 'executions': recent_executions } except Exception as e: self.logger.error(f"Error generating execution summary: {e}") raise def wait_for_execution_completion(self, execution_id: str, timeout: int = 300) -> ExecutionEvent: """Wait for a specific execution to complete""" start_time = time.time() while time.time() - start_time < timeout: execution = self.client.get_execution(execution_id) status = execution.get('status') if status in ['success', 'error', 'cancelled']: return self._create_execution_event(execution) time.sleep(2) raise TimeoutError(f"Execution {execution_id} did not complete within {timeout} seconds") def cancel_execution(self, execution_id: str) -> bool: """Cancel a running execution""" try: # N8N API might have a cancel endpoint - this would need to be implemented # based on the actual API capabilities self.logger.warning(f"Cancel execution {execution_id} - implement based on N8N API") return True except Exception as e: self.logger.error(f"Error cancelling execution {execution_id}: {e}") return False def get_execution_logs(self, execution_id: str) -> Dict: """Get detailed logs for an execution""" try: execution = self.client.get_execution(execution_id) logs = { 'execution_id': execution_id, 'status': execution.get('status'), 'workflow_id': execution.get('workflowId'), 'started_at': execution.get('startedAt'), 'finished_at': execution.get('finishedAt'), 'node_logs': [], 'errors': [] } # Extract node-level logs if available data = execution.get('data', {}) if 'resultData' in data: result_data = data['resultData'] # Extract errors if 'error' in result_data: logs['errors'].append(result_data['error']) # Extract node execution data if 'runData' in result_data: for node_name, node_runs in result_data['runData'].items(): for run in node_runs: logs['node_logs'].append({ 'node': node_name, 'start_time': run.get('startTime'), 'execution_time': run.get('executionTime'), 'data': run.get('data', {}), 'error': run.get('error') }) return logs except Exception as e: self.logger.error(f"Error getting execution logs: {e}") raise # Utility functions for common monitoring scenarios def create_simple_monitor(n8n_client) -> ExecutionMonitor: """Create a simple execution monitor with basic logging""" monitor = ExecutionMonitor(n8n_client) def log_success(event: ExecutionEvent): print(f"✅ Execution {event.execution_id} completed successfully in {event.duration:.2f}s") def log_error(event: ExecutionEvent): print(f"❌ Execution {event.execution_id} failed: {event.error_message}") def log_start(event: ExecutionEvent): print(f"🚀 Execution {event.execution_id} started") monitor.add_callback('on_success', log_success) monitor.add_callback('on_error', log_error) monitor.add_callback('on_start', log_start) return monitor if __name__ == "__main__": print("Execution Monitor initialized successfully.")