#!/usr/bin/env python3 """ Workflow Analyzer - Tools for analyzing N8N workflows and execution results Provides debugging, error analysis, and performance insights """ import json import re from typing import Dict, List, Optional, Any, Tuple from datetime import datetime from dataclasses import dataclass from collections import defaultdict @dataclass class AnalysisResult: """Result of workflow analysis""" workflow_id: str issues: List[Dict] suggestions: List[str] performance_metrics: Dict error_patterns: List[Dict] health_score: float class WorkflowAnalyzer: """Analyzes N8N workflows for issues, performance, and optimization opportunities""" def __init__(self): self.common_issues = { 'missing_error_handling': 'Node lacks error handling configuration', 'hardcoded_credentials': 'Credentials are hardcoded instead of using credential store', 'inefficient_loops': 'Loop structure may cause performance issues', 'missing_validation': 'Input validation is missing or insufficient', 'timeout_issues': 'Request timeout settings may be too aggressive', 'rate_limit_violations': 'API rate limits not properly handled' } def analyze_workflow_structure(self, workflow: Dict) -> Dict: """Analyze workflow structure for common issues""" issues = [] suggestions = [] nodes = workflow.get('nodes', []) connections = workflow.get('connections', {}) # Check for common structural issues issues.extend(self._check_error_handling(nodes)) issues.extend(self._check_credential_usage(nodes)) issues.extend(self._check_node_configurations(nodes)) issues.extend(self._check_workflow_complexity(nodes, connections)) # Generate suggestions based on issues suggestions.extend(self._generate_suggestions(issues)) return { 'issues': issues, 'suggestions': suggestions, 'node_count': len(nodes), 'connection_count': sum(len(conns.get('main', [])) for conns in connections.values()), 'complexity_score': self._calculate_complexity_score(nodes, connections) } def analyze_execution_logs(self, execution_data: Dict) -> Dict: """Analyze execution logs for errors and performance issues""" execution_id = execution_data.get('id') status = execution_data.get('status') data = execution_data.get('data', {}) analysis = { 'execution_id': execution_id, 'status': status, 'errors': [], 'warnings': [], 'performance_issues': [], 'node_timings': {}, 'total_duration': 0 } if status == 'error': analysis['errors'] = self._extract_errors(data) # Analyze node performance if 'resultData' in data: analysis['node_timings'] = self._analyze_node_timings(data['resultData']) analysis['performance_issues'] = self._identify_performance_issues(analysis['node_timings']) # Calculate total execution time start_time = execution_data.get('startedAt') finish_time = execution_data.get('finishedAt') if start_time and finish_time: start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00')) finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00')) analysis['total_duration'] = (finish_dt - start_dt).total_seconds() return analysis def find_error_patterns(self, executions: List[Dict]) -> List[Dict]: """Identify recurring error patterns across multiple executions""" error_patterns = defaultdict(int) error_details = defaultdict(list) for execution in executions: if execution.get('status') == 'error': errors = self._extract_errors(execution.get('data', {})) for error in errors: error_type = self._categorize_error(error) error_patterns[error_type] += 1 error_details[error_type].append({ 'execution_id': execution.get('id'), 'timestamp': execution.get('startedAt'), 'error': error }) patterns = [] for pattern, count in error_patterns.items(): patterns.append({ 'pattern': pattern, 'frequency': count, 'percentage': (count / len(executions)) * 100, 'examples': error_details[pattern][:3] # First 3 examples }) return sorted(patterns, key=lambda x: x['frequency'], reverse=True) def generate_health_report(self, workflow: Dict, executions: List[Dict]) -> AnalysisResult: """Generate comprehensive health report for a workflow""" workflow_id = workflow.get('id') # Analyze workflow structure structure_analysis = self.analyze_workflow_structure(workflow) # Analyze recent executions execution_analyses = [self.analyze_execution_logs(exec) for exec in executions[-10:]] error_patterns = self.find_error_patterns(executions) # Calculate performance metrics performance_metrics = self._calculate_performance_metrics(execution_analyses) # Calculate health score health_score = self._calculate_health_score(structure_analysis, execution_analyses, error_patterns) # Combine all issues and suggestions all_issues = structure_analysis['issues'] all_suggestions = structure_analysis['suggestions'] # Add execution-based suggestions if error_patterns: all_suggestions.extend(self._suggest_error_fixes(error_patterns)) return AnalysisResult( workflow_id=workflow_id, issues=all_issues, suggestions=all_suggestions, performance_metrics=performance_metrics, error_patterns=error_patterns, health_score=health_score ) def _check_error_handling(self, nodes: List[Dict]) -> List[Dict]: """Check for missing error handling in nodes""" issues = [] for node in nodes: node_type = node.get('type', '') if node_type in ['n8n-nodes-base.httpRequest', 'n8n-nodes-base.webhook']: # Check if error handling is configured parameters = node.get('parameters', {}) if not parameters.get('continueOnFail') and not parameters.get('errorHandling'): issues.append({ 'type': 'missing_error_handling', 'node': node.get('name'), 'severity': 'medium', 'description': f"Node '{node.get('name')}' lacks error handling configuration" }) return issues def _check_credential_usage(self, nodes: List[Dict]) -> List[Dict]: """Check for hardcoded credentials""" issues = [] for node in nodes: parameters = node.get('parameters', {}) param_str = json.dumps(parameters) # Look for potential hardcoded credentials suspicious_patterns = [ r'password.*["\'].*["\']', r'token.*["\'].*["\']', r'key.*["\'].*["\']', r'secret.*["\'].*["\']' ] for pattern in suspicious_patterns: if re.search(pattern, param_str, re.IGNORECASE): issues.append({ 'type': 'hardcoded_credentials', 'node': node.get('name'), 'severity': 'high', 'description': f"Node '{node.get('name')}' may contain hardcoded credentials" }) break return issues def _check_node_configurations(self, nodes: List[Dict]) -> List[Dict]: """Check node configurations for common issues""" issues = [] for node in nodes: node_type = node.get('type', '') parameters = node.get('parameters', {}) # Check HTTP request timeouts if node_type == 'n8n-nodes-base.httpRequest': timeout = parameters.get('timeout', 300) # Default 5 minutes if timeout < 30: issues.append({ 'type': 'timeout_issues', 'node': node.get('name'), 'severity': 'low', 'description': f"HTTP timeout ({timeout}s) may be too aggressive" }) # Check for missing required parameters if not parameters: issues.append({ 'type': 'missing_validation', 'node': node.get('name'), 'severity': 'medium', 'description': f"Node '{node.get('name')}' has no parameters configured" }) return issues def _check_workflow_complexity(self, nodes: List[Dict], connections: Dict) -> List[Dict]: """Check workflow complexity and structure""" issues = [] # Check for overly complex workflows (>20 nodes) if len(nodes) > 20: issues.append({ 'type': 'workflow_complexity', 'severity': 'medium', 'description': f"Workflow has {len(nodes)} nodes, consider breaking into smaller workflows" }) # Check for disconnected nodes connected_nodes = set() for source, targets in connections.items(): connected_nodes.add(source) for target_list in targets.get('main', []): for target in target_list: connected_nodes.add(target.get('node')) all_nodes = {node.get('name') for node in nodes} disconnected = all_nodes - connected_nodes if disconnected: issues.append({ 'type': 'disconnected_nodes', 'severity': 'high', 'description': f"Disconnected nodes found: {', '.join(disconnected)}" }) return issues def _extract_errors(self, execution_data: Dict) -> List[Dict]: """Extract error information from execution data""" errors = [] if 'resultData' in execution_data: result_data = execution_data['resultData'] if 'error' in result_data: error_info = result_data['error'] errors.append({ 'message': error_info.get('message', ''), 'stack': error_info.get('stack', ''), 'type': error_info.get('name', 'Unknown'), 'node': error_info.get('node', 'Unknown') }) return errors def _categorize_error(self, error: Dict) -> str: """Categorize error by type""" message = error.get('message', '').lower() if 'timeout' in message: return 'timeout_error' elif 'connection' in message or 'network' in message: return 'connection_error' elif 'authentication' in message or 'unauthorized' in message: return 'auth_error' elif 'rate limit' in message or '429' in message: return 'rate_limit_error' elif 'validation' in message or 'invalid' in message: return 'validation_error' else: return 'generic_error' def _analyze_node_timings(self, result_data: Dict) -> Dict: """Analyze timing data for each node""" timings = {} # Extract timing information from result data # This would need to be adapted based on actual N8N execution data structure run_data = result_data.get('runData', {}) for node_name, node_data in run_data.items(): if isinstance(node_data, list) and node_data: node_execution = node_data[0] start_time = node_execution.get('startTime') execution_time = node_execution.get('executionTime') if start_time and execution_time: timings[node_name] = { 'start_time': start_time, 'execution_time': execution_time, 'data_count': len(node_execution.get('data', {}).get('main', [])) } return timings def _identify_performance_issues(self, node_timings: Dict) -> List[Dict]: """Identify performance issues from node timing data""" issues = [] for node_name, timing in node_timings.items(): execution_time = timing.get('execution_time', 0) # Flag nodes taking longer than 30 seconds if execution_time > 30000: # milliseconds issues.append({ 'type': 'slow_node', 'node': node_name, 'execution_time': execution_time, 'description': f"Node '{node_name}' took {execution_time/1000:.2f}s to execute" }) return issues def _calculate_performance_metrics(self, execution_analyses: List[Dict]) -> Dict: """Calculate performance metrics from execution analyses""" if not execution_analyses: return {} durations = [analysis['total_duration'] for analysis in execution_analyses if analysis['total_duration'] > 0] error_count = len([analysis for analysis in execution_analyses if analysis['status'] == 'error']) return { 'avg_duration': sum(durations) / len(durations) if durations else 0, 'max_duration': max(durations) if durations else 0, 'min_duration': min(durations) if durations else 0, 'error_rate': (error_count / len(execution_analyses)) * 100, 'total_executions': len(execution_analyses) } def _calculate_complexity_score(self, nodes: List[Dict], connections: Dict) -> float: """Calculate workflow complexity score (0-100)""" node_count = len(nodes) connection_count = sum(len(conns.get('main', [])) for conns in connections.values()) # Simple complexity calculation complexity = (node_count * 2) + connection_count # Normalize to 0-100 scale return min(complexity / 2, 100) def _calculate_health_score(self, structure_analysis: Dict, execution_analyses: List[Dict], error_patterns: List[Dict]) -> float: """Calculate overall workflow health score (0-100)""" score = 100.0 # Deduct points for structural issues high_severity_issues = len([issue for issue in structure_analysis['issues'] if issue.get('severity') == 'high']) medium_severity_issues = len([issue for issue in structure_analysis['issues'] if issue.get('severity') == 'medium']) score -= (high_severity_issues * 20) score -= (medium_severity_issues * 10) # Deduct points for execution errors if execution_analyses: error_rate = len([analysis for analysis in execution_analyses if analysis['status'] == 'error']) / len(execution_analyses) score -= (error_rate * 50) # Deduct points for recurring error patterns for pattern in error_patterns: if pattern['frequency'] > 1: score -= min(pattern['frequency'] * 5, 30) return max(score, 0) def _generate_suggestions(self, issues: List[Dict]) -> List[str]: """Generate improvement suggestions based on issues""" suggestions = [] for issue in issues: issue_type = issue.get('type') if issue_type == 'missing_error_handling': suggestions.append("Add error handling to HTTP and webhook nodes using 'Continue on Fail' option") elif issue_type == 'hardcoded_credentials': suggestions.append("Move credentials to N8N credential store for better security") elif issue_type == 'timeout_issues': suggestions.append("Review and adjust timeout settings based on expected response times") elif issue_type == 'workflow_complexity': suggestions.append("Consider breaking complex workflow into smaller, manageable sub-workflows") elif issue_type == 'disconnected_nodes': suggestions.append("Remove unused nodes or connect them to the workflow") return list(set(suggestions)) # Remove duplicates def _suggest_error_fixes(self, error_patterns: List[Dict]) -> List[str]: """Suggest fixes for common error patterns""" suggestions = [] for pattern in error_patterns: pattern_type = pattern['pattern'] if pattern_type == 'timeout_error': suggestions.append("Increase timeout settings or implement retry logic for timeout-prone operations") elif pattern_type == 'connection_error': suggestions.append("Add connection retry logic and check network connectivity") elif pattern_type == 'auth_error': suggestions.append("Verify and refresh authentication credentials") elif pattern_type == 'rate_limit_error': suggestions.append("Implement rate limiting and backoff strategies") elif pattern_type == 'validation_error': suggestions.append("Add input validation and data sanitization steps") return suggestions if __name__ == "__main__": # Quick test of the analyzer analyzer = WorkflowAnalyzer() print("Workflow Analyzer initialized successfully.")