- Added comprehensive N8N development tools collection - Added Docker-containerized mock API server for testing - Added complete documentation and setup guides - Added mock API server with health checks and data endpoints - Tools include workflow analyzers, debuggers, and controllers 🤖 Generated with [Claude Code](https://claude.ai/code) Co-Authored-By: Claude <noreply@anthropic.com>
405 lines
16 KiB
Python
405 lines
16 KiB
Python
#!/usr/bin/env python3
|
|
"""
|
|
Execution Monitor - Tools for monitoring N8N workflow executions
|
|
Provides real-time monitoring, alerting, and execution management
|
|
"""
|
|
|
|
import time
|
|
import json
|
|
from typing import Dict, List, Optional, Callable, Any
|
|
from datetime import datetime, timedelta
|
|
from dataclasses import dataclass
|
|
from enum import Enum
|
|
from threading import Thread, Event
|
|
import logging
|
|
|
|
|
|
class ExecutionStatus(Enum):
|
|
"""Execution status enumeration"""
|
|
RUNNING = "running"
|
|
SUCCESS = "success"
|
|
ERROR = "error"
|
|
CANCELLED = "cancelled"
|
|
WAITING = "waiting"
|
|
|
|
|
|
@dataclass
|
|
class ExecutionEvent:
|
|
"""Represents an execution event"""
|
|
execution_id: str
|
|
workflow_id: str
|
|
status: ExecutionStatus
|
|
timestamp: datetime
|
|
duration: Optional[float] = None
|
|
error_message: Optional[str] = None
|
|
node_data: Optional[Dict] = None
|
|
|
|
|
|
class ExecutionMonitor:
|
|
"""Monitors N8N workflow executions and provides real-time insights"""
|
|
|
|
def __init__(self, n8n_client, poll_interval: int = 5):
|
|
"""Initialize execution monitor"""
|
|
self.client = n8n_client
|
|
self.poll_interval = poll_interval
|
|
self.monitoring = False
|
|
self.stop_event = Event()
|
|
self.callbacks = {
|
|
'on_success': [],
|
|
'on_error': [],
|
|
'on_start': [],
|
|
'on_complete': []
|
|
}
|
|
self.tracked_executions = {}
|
|
self.logger = self._setup_logger()
|
|
|
|
def _setup_logger(self) -> logging.Logger:
|
|
"""Setup logging for the monitor"""
|
|
logger = logging.getLogger('N8NExecutionMonitor')
|
|
logger.setLevel(logging.INFO)
|
|
|
|
if not logger.handlers:
|
|
handler = logging.StreamHandler()
|
|
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
|
|
handler.setFormatter(formatter)
|
|
logger.addHandler(handler)
|
|
|
|
return logger
|
|
|
|
def add_callback(self, event_type: str, callback: Callable[[ExecutionEvent], None]):
|
|
"""Add callback for execution events"""
|
|
if event_type in self.callbacks:
|
|
self.callbacks[event_type].append(callback)
|
|
else:
|
|
raise ValueError(f"Invalid event type: {event_type}")
|
|
|
|
def start_monitoring(self, workflow_ids: Optional[List[str]] = None):
|
|
"""Start monitoring workflow executions"""
|
|
if self.monitoring:
|
|
self.logger.warning("Monitoring already running")
|
|
return
|
|
|
|
self.monitoring = True
|
|
self.stop_event.clear()
|
|
|
|
monitor_thread = Thread(target=self._monitor_loop, args=(workflow_ids,))
|
|
monitor_thread.daemon = True
|
|
monitor_thread.start()
|
|
|
|
self.logger.info("Execution monitoring started")
|
|
|
|
def stop_monitoring(self):
|
|
"""Stop monitoring workflow executions"""
|
|
if not self.monitoring:
|
|
return
|
|
|
|
self.monitoring = False
|
|
self.stop_event.set()
|
|
self.logger.info("Execution monitoring stopped")
|
|
|
|
def _monitor_loop(self, workflow_ids: Optional[List[str]]):
|
|
"""Main monitoring loop"""
|
|
while self.monitoring and not self.stop_event.is_set():
|
|
try:
|
|
self._check_executions(workflow_ids)
|
|
time.sleep(self.poll_interval)
|
|
except Exception as e:
|
|
self.logger.error(f"Error in monitoring loop: {e}")
|
|
time.sleep(self.poll_interval)
|
|
|
|
def _check_executions(self, workflow_ids: Optional[List[str]]):
|
|
"""Check for new or updated executions"""
|
|
try:
|
|
# Get recent executions
|
|
executions = self.client.get_executions(limit=50)
|
|
|
|
for execution in executions:
|
|
execution_id = execution.get('id')
|
|
workflow_id = execution.get('workflowId')
|
|
|
|
# Filter by workflow IDs if specified
|
|
if workflow_ids and workflow_id not in workflow_ids:
|
|
continue
|
|
|
|
# Check if this is a new or updated execution
|
|
if self._is_execution_updated(execution):
|
|
event = self._create_execution_event(execution)
|
|
self._handle_execution_event(event)
|
|
|
|
# Update tracked executions
|
|
self.tracked_executions[execution_id] = {
|
|
'status': execution.get('status'),
|
|
'last_updated': datetime.now()
|
|
}
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error checking executions: {e}")
|
|
|
|
def _is_execution_updated(self, execution: Dict) -> bool:
|
|
"""Check if execution is new or has been updated"""
|
|
execution_id = execution.get('id')
|
|
current_status = execution.get('status')
|
|
|
|
if execution_id not in self.tracked_executions:
|
|
return True
|
|
|
|
tracked_status = self.tracked_executions[execution_id]['status']
|
|
return current_status != tracked_status
|
|
|
|
def _create_execution_event(self, execution: Dict) -> ExecutionEvent:
|
|
"""Create ExecutionEvent from execution data"""
|
|
execution_id = execution.get('id')
|
|
workflow_id = execution.get('workflowId')
|
|
status = ExecutionStatus(execution.get('status'))
|
|
|
|
# Calculate duration if available
|
|
duration = None
|
|
start_time = execution.get('startedAt')
|
|
finish_time = execution.get('finishedAt')
|
|
|
|
if start_time and finish_time:
|
|
start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
|
|
finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00'))
|
|
duration = (finish_dt - start_dt).total_seconds()
|
|
|
|
# Extract error message if available
|
|
error_message = None
|
|
if status == ExecutionStatus.ERROR:
|
|
data = execution.get('data', {})
|
|
if 'resultData' in data and 'error' in data['resultData']:
|
|
error_message = data['resultData']['error'].get('message')
|
|
|
|
return ExecutionEvent(
|
|
execution_id=execution_id,
|
|
workflow_id=workflow_id,
|
|
status=status,
|
|
timestamp=datetime.now(),
|
|
duration=duration,
|
|
error_message=error_message,
|
|
node_data=execution.get('data')
|
|
)
|
|
|
|
def _handle_execution_event(self, event: ExecutionEvent):
|
|
"""Handle execution event and trigger callbacks"""
|
|
self.logger.info(f"Execution {event.execution_id} status: {event.status.value}")
|
|
|
|
# Trigger appropriate callbacks
|
|
if event.status == ExecutionStatus.SUCCESS:
|
|
for callback in self.callbacks['on_success']:
|
|
try:
|
|
callback(event)
|
|
except Exception as e:
|
|
self.logger.error(f"Error in success callback: {e}")
|
|
|
|
elif event.status == ExecutionStatus.ERROR:
|
|
for callback in self.callbacks['on_error']:
|
|
try:
|
|
callback(event)
|
|
except Exception as e:
|
|
self.logger.error(f"Error in error callback: {e}")
|
|
|
|
elif event.status == ExecutionStatus.RUNNING:
|
|
for callback in self.callbacks['on_start']:
|
|
try:
|
|
callback(event)
|
|
except Exception as e:
|
|
self.logger.error(f"Error in start callback: {e}")
|
|
|
|
# Always trigger complete callbacks for finished executions
|
|
if event.status in [ExecutionStatus.SUCCESS, ExecutionStatus.ERROR, ExecutionStatus.CANCELLED]:
|
|
for callback in self.callbacks['on_complete']:
|
|
try:
|
|
callback(event)
|
|
except Exception as e:
|
|
self.logger.error(f"Error in complete callback: {e}")
|
|
|
|
def execute_and_monitor(self, workflow_id: str, test_data: Optional[Dict] = None, timeout: int = 300) -> ExecutionEvent:
|
|
"""Execute workflow and monitor until completion"""
|
|
try:
|
|
# Start execution
|
|
result = self.client.execute_workflow(workflow_id, test_data)
|
|
execution_id = result.get('id')
|
|
|
|
if not execution_id:
|
|
raise Exception("Failed to get execution ID from workflow execution")
|
|
|
|
self.logger.info(f"Started execution {execution_id} for workflow {workflow_id}")
|
|
|
|
# Monitor execution until completion
|
|
start_time = time.time()
|
|
while time.time() - start_time < timeout:
|
|
execution = self.client.get_execution(execution_id)
|
|
status = execution.get('status')
|
|
|
|
if status in ['success', 'error', 'cancelled']:
|
|
event = self._create_execution_event(execution)
|
|
self.logger.info(f"Execution {execution_id} completed with status: {status}")
|
|
return event
|
|
|
|
time.sleep(2) # Check every 2 seconds
|
|
|
|
# Timeout reached
|
|
raise TimeoutError(f"Execution {execution_id} did not complete within {timeout} seconds")
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error executing and monitoring workflow: {e}")
|
|
raise
|
|
|
|
def get_execution_summary(self, hours: int = 24) -> Dict:
|
|
"""Get execution summary for the specified time period"""
|
|
try:
|
|
# Get executions from the specified time period
|
|
executions = self.client.get_executions(limit=200)
|
|
|
|
# Filter by time period
|
|
cutoff_time = datetime.now() - timedelta(hours=hours)
|
|
recent_executions = []
|
|
|
|
for execution in executions:
|
|
started_at = execution.get('startedAt')
|
|
if started_at:
|
|
exec_time = datetime.fromisoformat(started_at.replace('Z', '+00:00'))
|
|
if exec_time > cutoff_time:
|
|
recent_executions.append(execution)
|
|
|
|
# Calculate summary statistics
|
|
total = len(recent_executions)
|
|
successful = len([e for e in recent_executions if e.get('status') == 'success'])
|
|
failed = len([e for e in recent_executions if e.get('status') == 'error'])
|
|
running = len([e for e in recent_executions if e.get('status') == 'running'])
|
|
|
|
# Calculate average duration
|
|
durations = []
|
|
for execution in recent_executions:
|
|
start_time = execution.get('startedAt')
|
|
finish_time = execution.get('finishedAt')
|
|
if start_time and finish_time:
|
|
start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
|
|
finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00'))
|
|
durations.append((finish_dt - start_dt).total_seconds())
|
|
|
|
avg_duration = sum(durations) / len(durations) if durations else 0
|
|
|
|
# Group by workflow
|
|
workflow_stats = {}
|
|
for execution in recent_executions:
|
|
workflow_id = execution.get('workflowId')
|
|
if workflow_id not in workflow_stats:
|
|
workflow_stats[workflow_id] = {'total': 0, 'success': 0, 'error': 0}
|
|
|
|
workflow_stats[workflow_id]['total'] += 1
|
|
status = execution.get('status')
|
|
if status == 'success':
|
|
workflow_stats[workflow_id]['success'] += 1
|
|
elif status == 'error':
|
|
workflow_stats[workflow_id]['error'] += 1
|
|
|
|
return {
|
|
'time_period_hours': hours,
|
|
'total_executions': total,
|
|
'successful_executions': successful,
|
|
'failed_executions': failed,
|
|
'running_executions': running,
|
|
'success_rate': (successful / total * 100) if total > 0 else 0,
|
|
'average_duration_seconds': avg_duration,
|
|
'workflow_statistics': workflow_stats,
|
|
'executions': recent_executions
|
|
}
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error generating execution summary: {e}")
|
|
raise
|
|
|
|
def wait_for_execution_completion(self, execution_id: str, timeout: int = 300) -> ExecutionEvent:
|
|
"""Wait for a specific execution to complete"""
|
|
start_time = time.time()
|
|
|
|
while time.time() - start_time < timeout:
|
|
execution = self.client.get_execution(execution_id)
|
|
status = execution.get('status')
|
|
|
|
if status in ['success', 'error', 'cancelled']:
|
|
return self._create_execution_event(execution)
|
|
|
|
time.sleep(2)
|
|
|
|
raise TimeoutError(f"Execution {execution_id} did not complete within {timeout} seconds")
|
|
|
|
def cancel_execution(self, execution_id: str) -> bool:
|
|
"""Cancel a running execution"""
|
|
try:
|
|
# N8N API might have a cancel endpoint - this would need to be implemented
|
|
# based on the actual API capabilities
|
|
self.logger.warning(f"Cancel execution {execution_id} - implement based on N8N API")
|
|
return True
|
|
except Exception as e:
|
|
self.logger.error(f"Error cancelling execution {execution_id}: {e}")
|
|
return False
|
|
|
|
def get_execution_logs(self, execution_id: str) -> Dict:
|
|
"""Get detailed logs for an execution"""
|
|
try:
|
|
execution = self.client.get_execution(execution_id)
|
|
|
|
logs = {
|
|
'execution_id': execution_id,
|
|
'status': execution.get('status'),
|
|
'workflow_id': execution.get('workflowId'),
|
|
'started_at': execution.get('startedAt'),
|
|
'finished_at': execution.get('finishedAt'),
|
|
'node_logs': [],
|
|
'errors': []
|
|
}
|
|
|
|
# Extract node-level logs if available
|
|
data = execution.get('data', {})
|
|
if 'resultData' in data:
|
|
result_data = data['resultData']
|
|
|
|
# Extract errors
|
|
if 'error' in result_data:
|
|
logs['errors'].append(result_data['error'])
|
|
|
|
# Extract node execution data
|
|
if 'runData' in result_data:
|
|
for node_name, node_runs in result_data['runData'].items():
|
|
for run in node_runs:
|
|
logs['node_logs'].append({
|
|
'node': node_name,
|
|
'start_time': run.get('startTime'),
|
|
'execution_time': run.get('executionTime'),
|
|
'data': run.get('data', {}),
|
|
'error': run.get('error')
|
|
})
|
|
|
|
return logs
|
|
|
|
except Exception as e:
|
|
self.logger.error(f"Error getting execution logs: {e}")
|
|
raise
|
|
|
|
|
|
# Utility functions for common monitoring scenarios
|
|
|
|
def create_simple_monitor(n8n_client) -> ExecutionMonitor:
|
|
"""Create a simple execution monitor with basic logging"""
|
|
monitor = ExecutionMonitor(n8n_client)
|
|
|
|
def log_success(event: ExecutionEvent):
|
|
print(f"✅ Execution {event.execution_id} completed successfully in {event.duration:.2f}s")
|
|
|
|
def log_error(event: ExecutionEvent):
|
|
print(f"❌ Execution {event.execution_id} failed: {event.error_message}")
|
|
|
|
def log_start(event: ExecutionEvent):
|
|
print(f"🚀 Execution {event.execution_id} started")
|
|
|
|
monitor.add_callback('on_success', log_success)
|
|
monitor.add_callback('on_error', log_error)
|
|
monitor.add_callback('on_start', log_start)
|
|
|
|
return monitor
|
|
|
|
|
|
if __name__ == "__main__":
|
|
print("Execution Monitor initialized successfully.") |