Files
tkb_timeshift/claude_n8n/tools/execution_monitor.py
Docker Config Backup 8793ac4f59 Add Claude N8N toolkit with Docker mock API server
- Added comprehensive N8N development tools collection
- Added Docker-containerized mock API server for testing
- Added complete documentation and setup guides
- Added mock API server with health checks and data endpoints
- Tools include workflow analyzers, debuggers, and controllers

🤖 Generated with [Claude Code](https://claude.ai/code)

Co-Authored-By: Claude <noreply@anthropic.com>
2025-06-17 21:23:46 +02:00

405 lines
16 KiB
Python

#!/usr/bin/env python3
"""
Execution Monitor - Tools for monitoring N8N workflow executions
Provides real-time monitoring, alerting, and execution management
"""
import time
import json
from typing import Dict, List, Optional, Callable, Any
from datetime import datetime, timedelta
from dataclasses import dataclass
from enum import Enum
from threading import Thread, Event
import logging
class ExecutionStatus(Enum):
"""Execution status enumeration"""
RUNNING = "running"
SUCCESS = "success"
ERROR = "error"
CANCELLED = "cancelled"
WAITING = "waiting"
@dataclass
class ExecutionEvent:
"""Represents an execution event"""
execution_id: str
workflow_id: str
status: ExecutionStatus
timestamp: datetime
duration: Optional[float] = None
error_message: Optional[str] = None
node_data: Optional[Dict] = None
class ExecutionMonitor:
"""Monitors N8N workflow executions and provides real-time insights"""
def __init__(self, n8n_client, poll_interval: int = 5):
"""Initialize execution monitor"""
self.client = n8n_client
self.poll_interval = poll_interval
self.monitoring = False
self.stop_event = Event()
self.callbacks = {
'on_success': [],
'on_error': [],
'on_start': [],
'on_complete': []
}
self.tracked_executions = {}
self.logger = self._setup_logger()
def _setup_logger(self) -> logging.Logger:
"""Setup logging for the monitor"""
logger = logging.getLogger('N8NExecutionMonitor')
logger.setLevel(logging.INFO)
if not logger.handlers:
handler = logging.StreamHandler()
formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s')
handler.setFormatter(formatter)
logger.addHandler(handler)
return logger
def add_callback(self, event_type: str, callback: Callable[[ExecutionEvent], None]):
"""Add callback for execution events"""
if event_type in self.callbacks:
self.callbacks[event_type].append(callback)
else:
raise ValueError(f"Invalid event type: {event_type}")
def start_monitoring(self, workflow_ids: Optional[List[str]] = None):
"""Start monitoring workflow executions"""
if self.monitoring:
self.logger.warning("Monitoring already running")
return
self.monitoring = True
self.stop_event.clear()
monitor_thread = Thread(target=self._monitor_loop, args=(workflow_ids,))
monitor_thread.daemon = True
monitor_thread.start()
self.logger.info("Execution monitoring started")
def stop_monitoring(self):
"""Stop monitoring workflow executions"""
if not self.monitoring:
return
self.monitoring = False
self.stop_event.set()
self.logger.info("Execution monitoring stopped")
def _monitor_loop(self, workflow_ids: Optional[List[str]]):
"""Main monitoring loop"""
while self.monitoring and not self.stop_event.is_set():
try:
self._check_executions(workflow_ids)
time.sleep(self.poll_interval)
except Exception as e:
self.logger.error(f"Error in monitoring loop: {e}")
time.sleep(self.poll_interval)
def _check_executions(self, workflow_ids: Optional[List[str]]):
"""Check for new or updated executions"""
try:
# Get recent executions
executions = self.client.get_executions(limit=50)
for execution in executions:
execution_id = execution.get('id')
workflow_id = execution.get('workflowId')
# Filter by workflow IDs if specified
if workflow_ids and workflow_id not in workflow_ids:
continue
# Check if this is a new or updated execution
if self._is_execution_updated(execution):
event = self._create_execution_event(execution)
self._handle_execution_event(event)
# Update tracked executions
self.tracked_executions[execution_id] = {
'status': execution.get('status'),
'last_updated': datetime.now()
}
except Exception as e:
self.logger.error(f"Error checking executions: {e}")
def _is_execution_updated(self, execution: Dict) -> bool:
"""Check if execution is new or has been updated"""
execution_id = execution.get('id')
current_status = execution.get('status')
if execution_id not in self.tracked_executions:
return True
tracked_status = self.tracked_executions[execution_id]['status']
return current_status != tracked_status
def _create_execution_event(self, execution: Dict) -> ExecutionEvent:
"""Create ExecutionEvent from execution data"""
execution_id = execution.get('id')
workflow_id = execution.get('workflowId')
status = ExecutionStatus(execution.get('status'))
# Calculate duration if available
duration = None
start_time = execution.get('startedAt')
finish_time = execution.get('finishedAt')
if start_time and finish_time:
start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00'))
duration = (finish_dt - start_dt).total_seconds()
# Extract error message if available
error_message = None
if status == ExecutionStatus.ERROR:
data = execution.get('data', {})
if 'resultData' in data and 'error' in data['resultData']:
error_message = data['resultData']['error'].get('message')
return ExecutionEvent(
execution_id=execution_id,
workflow_id=workflow_id,
status=status,
timestamp=datetime.now(),
duration=duration,
error_message=error_message,
node_data=execution.get('data')
)
def _handle_execution_event(self, event: ExecutionEvent):
"""Handle execution event and trigger callbacks"""
self.logger.info(f"Execution {event.execution_id} status: {event.status.value}")
# Trigger appropriate callbacks
if event.status == ExecutionStatus.SUCCESS:
for callback in self.callbacks['on_success']:
try:
callback(event)
except Exception as e:
self.logger.error(f"Error in success callback: {e}")
elif event.status == ExecutionStatus.ERROR:
for callback in self.callbacks['on_error']:
try:
callback(event)
except Exception as e:
self.logger.error(f"Error in error callback: {e}")
elif event.status == ExecutionStatus.RUNNING:
for callback in self.callbacks['on_start']:
try:
callback(event)
except Exception as e:
self.logger.error(f"Error in start callback: {e}")
# Always trigger complete callbacks for finished executions
if event.status in [ExecutionStatus.SUCCESS, ExecutionStatus.ERROR, ExecutionStatus.CANCELLED]:
for callback in self.callbacks['on_complete']:
try:
callback(event)
except Exception as e:
self.logger.error(f"Error in complete callback: {e}")
def execute_and_monitor(self, workflow_id: str, test_data: Optional[Dict] = None, timeout: int = 300) -> ExecutionEvent:
"""Execute workflow and monitor until completion"""
try:
# Start execution
result = self.client.execute_workflow(workflow_id, test_data)
execution_id = result.get('id')
if not execution_id:
raise Exception("Failed to get execution ID from workflow execution")
self.logger.info(f"Started execution {execution_id} for workflow {workflow_id}")
# Monitor execution until completion
start_time = time.time()
while time.time() - start_time < timeout:
execution = self.client.get_execution(execution_id)
status = execution.get('status')
if status in ['success', 'error', 'cancelled']:
event = self._create_execution_event(execution)
self.logger.info(f"Execution {execution_id} completed with status: {status}")
return event
time.sleep(2) # Check every 2 seconds
# Timeout reached
raise TimeoutError(f"Execution {execution_id} did not complete within {timeout} seconds")
except Exception as e:
self.logger.error(f"Error executing and monitoring workflow: {e}")
raise
def get_execution_summary(self, hours: int = 24) -> Dict:
"""Get execution summary for the specified time period"""
try:
# Get executions from the specified time period
executions = self.client.get_executions(limit=200)
# Filter by time period
cutoff_time = datetime.now() - timedelta(hours=hours)
recent_executions = []
for execution in executions:
started_at = execution.get('startedAt')
if started_at:
exec_time = datetime.fromisoformat(started_at.replace('Z', '+00:00'))
if exec_time > cutoff_time:
recent_executions.append(execution)
# Calculate summary statistics
total = len(recent_executions)
successful = len([e for e in recent_executions if e.get('status') == 'success'])
failed = len([e for e in recent_executions if e.get('status') == 'error'])
running = len([e for e in recent_executions if e.get('status') == 'running'])
# Calculate average duration
durations = []
for execution in recent_executions:
start_time = execution.get('startedAt')
finish_time = execution.get('finishedAt')
if start_time and finish_time:
start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00'))
durations.append((finish_dt - start_dt).total_seconds())
avg_duration = sum(durations) / len(durations) if durations else 0
# Group by workflow
workflow_stats = {}
for execution in recent_executions:
workflow_id = execution.get('workflowId')
if workflow_id not in workflow_stats:
workflow_stats[workflow_id] = {'total': 0, 'success': 0, 'error': 0}
workflow_stats[workflow_id]['total'] += 1
status = execution.get('status')
if status == 'success':
workflow_stats[workflow_id]['success'] += 1
elif status == 'error':
workflow_stats[workflow_id]['error'] += 1
return {
'time_period_hours': hours,
'total_executions': total,
'successful_executions': successful,
'failed_executions': failed,
'running_executions': running,
'success_rate': (successful / total * 100) if total > 0 else 0,
'average_duration_seconds': avg_duration,
'workflow_statistics': workflow_stats,
'executions': recent_executions
}
except Exception as e:
self.logger.error(f"Error generating execution summary: {e}")
raise
def wait_for_execution_completion(self, execution_id: str, timeout: int = 300) -> ExecutionEvent:
"""Wait for a specific execution to complete"""
start_time = time.time()
while time.time() - start_time < timeout:
execution = self.client.get_execution(execution_id)
status = execution.get('status')
if status in ['success', 'error', 'cancelled']:
return self._create_execution_event(execution)
time.sleep(2)
raise TimeoutError(f"Execution {execution_id} did not complete within {timeout} seconds")
def cancel_execution(self, execution_id: str) -> bool:
"""Cancel a running execution"""
try:
# N8N API might have a cancel endpoint - this would need to be implemented
# based on the actual API capabilities
self.logger.warning(f"Cancel execution {execution_id} - implement based on N8N API")
return True
except Exception as e:
self.logger.error(f"Error cancelling execution {execution_id}: {e}")
return False
def get_execution_logs(self, execution_id: str) -> Dict:
"""Get detailed logs for an execution"""
try:
execution = self.client.get_execution(execution_id)
logs = {
'execution_id': execution_id,
'status': execution.get('status'),
'workflow_id': execution.get('workflowId'),
'started_at': execution.get('startedAt'),
'finished_at': execution.get('finishedAt'),
'node_logs': [],
'errors': []
}
# Extract node-level logs if available
data = execution.get('data', {})
if 'resultData' in data:
result_data = data['resultData']
# Extract errors
if 'error' in result_data:
logs['errors'].append(result_data['error'])
# Extract node execution data
if 'runData' in result_data:
for node_name, node_runs in result_data['runData'].items():
for run in node_runs:
logs['node_logs'].append({
'node': node_name,
'start_time': run.get('startTime'),
'execution_time': run.get('executionTime'),
'data': run.get('data', {}),
'error': run.get('error')
})
return logs
except Exception as e:
self.logger.error(f"Error getting execution logs: {e}")
raise
# Utility functions for common monitoring scenarios
def create_simple_monitor(n8n_client) -> ExecutionMonitor:
"""Create a simple execution monitor with basic logging"""
monitor = ExecutionMonitor(n8n_client)
def log_success(event: ExecutionEvent):
print(f"✅ Execution {event.execution_id} completed successfully in {event.duration:.2f}s")
def log_error(event: ExecutionEvent):
print(f"❌ Execution {event.execution_id} failed: {event.error_message}")
def log_start(event: ExecutionEvent):
print(f"🚀 Execution {event.execution_id} started")
monitor.add_callback('on_success', log_success)
monitor.add_callback('on_error', log_error)
monitor.add_callback('on_start', log_start)
return monitor
if __name__ == "__main__":
print("Execution Monitor initialized successfully.")