tkb_timeshift/claude_n8n/tools/workflow_analyzer.py

#!/usr/bin/env python3
"""
Workflow Analyzer - Tools for analyzing N8N workflows and execution results
Provides debugging, error analysis, and performance insights
"""

import json
import re
from typing import Dict, List, Optional, Any, Tuple
from datetime import datetime
from dataclasses import dataclass
from collections import defaultdict


@dataclass
class AnalysisResult:
    """Result of workflow analysis"""
    workflow_id: str
    issues: List[Dict]
    suggestions: List[str]
    performance_metrics: Dict
    error_patterns: List[Dict]
    health_score: float


class WorkflowAnalyzer:
    """Analyzes N8N workflows for issues, performance, and optimization opportunities"""

    def __init__(self):
        self.common_issues = {
            'missing_error_handling': 'Node lacks error handling configuration',
            'hardcoded_credentials': 'Credentials are hardcoded instead of using credential store',
            'inefficient_loops': 'Loop structure may cause performance issues',
            'missing_validation': 'Input validation is missing or insufficient',
            'timeout_issues': 'Request timeout settings may be too aggressive',
            'rate_limit_violations': 'API rate limits not properly handled'
        }

    def analyze_workflow_structure(self, workflow: Dict) -> Dict:
        """Analyze workflow structure for common issues"""
        issues = []
        suggestions = []

        nodes = workflow.get('nodes', [])
        connections = workflow.get('connections', {})

        # Check for common structural issues
        issues.extend(self._check_error_handling(nodes))
        issues.extend(self._check_credential_usage(nodes))
        issues.extend(self._check_node_configurations(nodes))
        issues.extend(self._check_workflow_complexity(nodes, connections))

        # Generate suggestions based on issues
        suggestions.extend(self._generate_suggestions(issues))

        return {
            'issues': issues,
            'suggestions': suggestions,
            'node_count': len(nodes),
            'connection_count': sum(len(conns.get('main', [])) for conns in connections.values()),
            'complexity_score': self._calculate_complexity_score(nodes, connections)
        }

    def analyze_execution_logs(self, execution_data: Dict) -> Dict:
        """Analyze execution logs for errors and performance issues"""
        execution_id = execution_data.get('id')
        status = execution_data.get('status')
        data = execution_data.get('data', {})

        analysis = {
            'execution_id': execution_id,
            'status': status,
            'errors': [],
            'warnings': [],
            'performance_issues': [],
            'node_timings': {},
            'total_duration': 0
        }

        if status == 'error':
            analysis['errors'] = self._extract_errors(data)

        # Analyze node performance
        if 'resultData' in data:
            analysis['node_timings'] = self._analyze_node_timings(data['resultData'])
            analysis['performance_issues'] = self._identify_performance_issues(analysis['node_timings'])

        # Calculate total execution time
        start_time = execution_data.get('startedAt')
        finish_time = execution_data.get('finishedAt')
        if start_time and finish_time:
            start_dt = datetime.fromisoformat(start_time.replace('Z', '+00:00'))
            finish_dt = datetime.fromisoformat(finish_time.replace('Z', '+00:00'))
            analysis['total_duration'] = (finish_dt - start_dt).total_seconds()

        return analysis

    def find_error_patterns(self, executions: List[Dict]) -> List[Dict]:
        """Identify recurring error patterns across multiple executions"""
        error_patterns = defaultdict(int)
        error_details = defaultdict(list)

        for execution in executions:
            if execution.get('status') == 'error':
                errors = self._extract_errors(execution.get('data', {}))
                for error in errors:
                    error_type = self._categorize_error(error)
                    error_patterns[error_type] += 1
                    error_details[error_type].append({
                        'execution_id': execution.get('id'),
                        'timestamp': execution.get('startedAt'),
                        'error': error
                    })

        patterns = []
        for pattern, count in error_patterns.items():
            patterns.append({
                'pattern': pattern,
                'frequency': count,
                'percentage': (count / len(executions)) * 100,
                'examples': error_details[pattern][:3]  # First 3 examples
            })

        return sorted(patterns, key=lambda x: x['frequency'], reverse=True)

    def generate_health_report(self, workflow: Dict, executions: List[Dict]) -> AnalysisResult:
        """Generate comprehensive health report for a workflow"""
        workflow_id = workflow.get('id')

        # Analyze workflow structure
        structure_analysis = self.analyze_workflow_structure(workflow)

        # Analyze recent executions
        execution_analyses = [self.analyze_execution_logs(exec) for exec in executions[-10:]]
        error_patterns = self.find_error_patterns(executions)

        # Calculate performance metrics
        performance_metrics = self._calculate_performance_metrics(execution_analyses)

        # Calculate health score
        health_score = self._calculate_health_score(structure_analysis, execution_analyses, error_patterns)

        # Combine all issues and suggestions
        all_issues = structure_analysis['issues']
        all_suggestions = structure_analysis['suggestions']

        # Add execution-based suggestions
        if error_patterns:
            all_suggestions.extend(self._suggest_error_fixes(error_patterns))

        return AnalysisResult(
            workflow_id=workflow_id,
            issues=all_issues,
            suggestions=all_suggestions,
            performance_metrics=performance_metrics,
            error_patterns=error_patterns,
            health_score=health_score
        )

    def _check_error_handling(self, nodes: List[Dict]) -> List[Dict]:
        """Check for missing error handling in nodes"""
        issues = []

        for node in nodes:
            node_type = node.get('type', '')
            if node_type in ['n8n-nodes-base.httpRequest', 'n8n-nodes-base.webhook']:
                # Check if error handling is configured
                parameters = node.get('parameters', {})
                if not parameters.get('continueOnFail') and not parameters.get('errorHandling'):
                    issues.append({
                        'type': 'missing_error_handling',
                        'node': node.get('name'),
                        'severity': 'medium',
                        'description': f"Node '{node.get('name')}' lacks error handling configuration"
                    })

        return issues

    def _check_credential_usage(self, nodes: List[Dict]) -> List[Dict]:
        """Check for hardcoded credentials"""
        issues = []

        for node in nodes:
            parameters = node.get('parameters', {})
            param_str = json.dumps(parameters)

            # Look for potential hardcoded credentials
            suspicious_patterns = [
                r'password.*["\'].*["\']',
                r'token.*["\'].*["\']',
                r'key.*["\'].*["\']',
                r'secret.*["\'].*["\']'
            ]

            for pattern in suspicious_patterns:
                if re.search(pattern, param_str, re.IGNORECASE):
                    issues.append({
                        'type': 'hardcoded_credentials',
                        'node': node.get('name'),
                        'severity': 'high',
                        'description': f"Node '{node.get('name')}' may contain hardcoded credentials"
                    })
                    break

        return issues

    def _check_node_configurations(self, nodes: List[Dict]) -> List[Dict]:
        """Check node configurations for common issues"""
        issues = []

        for node in nodes:
            node_type = node.get('type', '')
            parameters = node.get('parameters', {})

            # Check HTTP request timeouts
            if node_type == 'n8n-nodes-base.httpRequest':
                timeout = parameters.get('timeout', 300)  # Default 5 minutes
                if timeout < 30:
                    issues.append({
                        'type': 'timeout_issues',
                        'node': node.get('name'),
                        'severity': 'low',
                        'description': f"HTTP timeout ({timeout}s) may be too aggressive"
                    })

            # Check for missing required parameters
            if not parameters:
                issues.append({
                    'type': 'missing_validation',
                    'node': node.get('name'),
                    'severity': 'medium',
                    'description': f"Node '{node.get('name')}' has no parameters configured"
                })

        return issues

    def _check_workflow_complexity(self, nodes: List[Dict], connections: Dict) -> List[Dict]:
        """Check workflow complexity and structure"""
        issues = []

        # Check for overly complex workflows (>20 nodes)
        if len(nodes) > 20:
            issues.append({
                'type': 'workflow_complexity',
                'severity': 'medium',
                'description': f"Workflow has {len(nodes)} nodes, consider breaking into smaller workflows"
            })

        # Check for disconnected nodes
        connected_nodes = set()
        for source, targets in connections.items():
            connected_nodes.add(source)
            for target_list in targets.get('main', []):
                for target in target_list:
                    connected_nodes.add(target.get('node'))

        all_nodes = {node.get('name') for node in nodes}
        disconnected = all_nodes - connected_nodes

        if disconnected:
            issues.append({
                'type': 'disconnected_nodes',
                'severity': 'high',
                'description': f"Disconnected nodes found: {', '.join(disconnected)}"
            })

        return issues

    def _extract_errors(self, execution_data: Dict) -> List[Dict]:
        """Extract error information from execution data"""
        errors = []

        if 'resultData' in execution_data:
            result_data = execution_data['resultData']
            if 'error' in result_data:
                error_info = result_data['error']
                errors.append({
                    'message': error_info.get('message', ''),
                    'stack': error_info.get('stack', ''),
                    'type': error_info.get('name', 'Unknown'),
                    'node': error_info.get('node', 'Unknown')
                })

        return errors

    def _categorize_error(self, error: Dict) -> str:
        """Categorize error by type"""
        message = error.get('message', '').lower()

        if 'timeout' in message:
            return 'timeout_error'
        elif 'connection' in message or 'network' in message:
            return 'connection_error'
        elif 'authentication' in message or 'unauthorized' in message:
            return 'auth_error'
        elif 'rate limit' in message or '429' in message:
            return 'rate_limit_error'
        elif 'validation' in message or 'invalid' in message:
            return 'validation_error'
        else:
            return 'generic_error'

    def _analyze_node_timings(self, result_data: Dict) -> Dict:
        """Analyze timing data for each node"""
        timings = {}

        # Extract timing information from result data
        # This would need to be adapted based on actual N8N execution data structure
        run_data = result_data.get('runData', {})

        for node_name, node_data in run_data.items():
            if isinstance(node_data, list) and node_data:
                node_execution = node_data[0]
                start_time = node_execution.get('startTime')
                execution_time = node_execution.get('executionTime')

                if start_time and execution_time:
                    timings[node_name] = {
                        'start_time': start_time,
                        'execution_time': execution_time,
                        'data_count': len(node_execution.get('data', {}).get('main', []))
                    }

        return timings

    def _identify_performance_issues(self, node_timings: Dict) -> List[Dict]:
        """Identify performance issues from node timing data"""
        issues = []

        for node_name, timing in node_timings.items():
            execution_time = timing.get('execution_time', 0)

            # Flag nodes taking longer than 30 seconds
            if execution_time > 30000:  # milliseconds
                issues.append({
                    'type': 'slow_node',
                    'node': node_name,
                    'execution_time': execution_time,
                    'description': f"Node '{node_name}' took {execution_time/1000:.2f}s to execute"
                })

        return issues

    def _calculate_performance_metrics(self, execution_analyses: List[Dict]) -> Dict:
        """Calculate performance metrics from execution analyses"""
        if not execution_analyses:
            return {}

        durations = [analysis['total_duration'] for analysis in execution_analyses if analysis['total_duration'] > 0]
        error_count = len([analysis for analysis in execution_analyses if analysis['status'] == 'error'])

        return {
            'avg_duration': sum(durations) / len(durations) if durations else 0,
            'max_duration': max(durations) if durations else 0,
            'min_duration': min(durations) if durations else 0,
            'error_rate': (error_count / len(execution_analyses)) * 100,
            'total_executions': len(execution_analyses)
        }

    def _calculate_complexity_score(self, nodes: List[Dict], connections: Dict) -> float:
        """Calculate workflow complexity score (0-100)"""
        node_count = len(nodes)
        connection_count = sum(len(conns.get('main', [])) for conns in connections.values())

        # Simple complexity calculation
        complexity = (node_count * 2) + connection_count

        # Normalize to 0-100 scale
        return min(complexity / 2, 100)

    def _calculate_health_score(self, structure_analysis: Dict, execution_analyses: List[Dict], error_patterns: List[Dict]) -> float:
        """Calculate overall workflow health score (0-100)"""
        score = 100.0

        # Deduct points for structural issues
        high_severity_issues = len([issue for issue in structure_analysis['issues'] if issue.get('severity') == 'high'])
        medium_severity_issues = len([issue for issue in structure_analysis['issues'] if issue.get('severity') == 'medium'])

        score -= (high_severity_issues * 20)
        score -= (medium_severity_issues * 10)

        # Deduct points for execution errors
        if execution_analyses:
            error_rate = len([analysis for analysis in execution_analyses if analysis['status'] == 'error']) / len(execution_analyses)
            score -= (error_rate * 50)

        # Deduct points for recurring error patterns
        for pattern in error_patterns:
            if pattern['frequency'] > 1:
                score -= min(pattern['frequency'] * 5, 30)

        return max(score, 0)

    def _generate_suggestions(self, issues: List[Dict]) -> List[str]:
        """Generate improvement suggestions based on issues"""
        suggestions = []

        for issue in issues:
            issue_type = issue.get('type')

            if issue_type == 'missing_error_handling':
                suggestions.append("Add error handling to HTTP and webhook nodes using 'Continue on Fail' option")
            elif issue_type == 'hardcoded_credentials':
                suggestions.append("Move credentials to N8N credential store for better security")
            elif issue_type == 'timeout_issues':
                suggestions.append("Review and adjust timeout settings based on expected response times")
            elif issue_type == 'workflow_complexity':
                suggestions.append("Consider breaking complex workflow into smaller, manageable sub-workflows")
            elif issue_type == 'disconnected_nodes':
                suggestions.append("Remove unused nodes or connect them to the workflow")

        return list(set(suggestions))  # Remove duplicates

    def _suggest_error_fixes(self, error_patterns: List[Dict]) -> List[str]:
        """Suggest fixes for common error patterns"""
        suggestions = []

        for pattern in error_patterns:
            pattern_type = pattern['pattern']

            if pattern_type == 'timeout_error':
                suggestions.append("Increase timeout settings or implement retry logic for timeout-prone operations")
            elif pattern_type == 'connection_error':
                suggestions.append("Add connection retry logic and check network connectivity")
            elif pattern_type == 'auth_error':
                suggestions.append("Verify and refresh authentication credentials")
            elif pattern_type == 'rate_limit_error':
                suggestions.append("Implement rate limiting and backoff strategies")
            elif pattern_type == 'validation_error':
                suggestions.append("Add input validation and data sanitization steps")

        return suggestions


if __name__ == "__main__":
    # Quick test of the analyzer
    analyzer = WorkflowAnalyzer()
    print("Workflow Analyzer initialized successfully.")