"""
Real-time Analytics Engine for Portal Performance Monitoring

This module provides real-time analysis of portal parsing performance,
selector health, and server status monitoring.

This version uses subprocess to call Django management commands instead of
direct Django imports to avoid initialization issues.
"""

from __future__ import annotations

import json
import subprocess
import sys
import time
from datetime import datetime, timedelta
from typing import Dict, List, Any, Optional
from pathlib import Path
from concurrent.futures import ProcessPoolExecutor, as_completed
from functools import lru_cache


class RealtimeAnalytics:
    """Main class for real-time analytics and performance monitoring."""
    
    def __init__(self):
        self.last_update_check = None
        self.last_page_count = None
        self.update_threshold_minutes = 15  # Consider server offline if no updates in 15 min
        
        # Find project root and manage.py
        self.project_root = self._find_project_root()
        self.manage_py = self.project_root / "manage.py"
        
        # Cache for reducing repeated queries
        self._cache = {}
        self._cache_ttl = 5  # seconds
        self._cache_max_size = 100  # Maximum cache entries
        
        # Process pool created on-demand, not persistent
        self._process_pool = None
        self._pool_idle_timeout = 30  # Cleanup pool after 30s of inactivity
        self._last_pool_use = None
    
    def _find_project_root(self) -> Path:
        """Locate the directory that contains manage.py."""
        p = Path(__file__).resolve().parent
        for _ in range(6):
            if (p / "manage.py").exists():
                return p
            p = p.parent
        raise RuntimeError("Could not find manage.py")
    
    def _call_django_command(self, action: str, domain: str = None, **kwargs) -> Dict[str, Any]:
        """Call Django management command to get data with caching."""
        # Cleanup idle pool periodically
        self._cleanup_idle_pool()
        
        # Generate cache key
        cache_key = f"{action}_{domain}_{json.dumps(kwargs, sort_keys=True)}"
        
        # Check cache
        if cache_key in self._cache:
            cached_data, cached_time = self._cache[cache_key]
            if time.time() - cached_time < self._cache_ttl:
                return cached_data
        
        try:
            cmd = [sys.executable, str(self.manage_py), "realtime_data", action]
            if domain:
                cmd.extend(["--domain", domain])
            
            # Add any additional keyword arguments as command line options
            for key, value in kwargs.items():
                cmd.extend([f"--{key}", str(value)])
            
            result = subprocess.run(
                cmd,
                cwd=str(self.project_root),
                capture_output=True,
                text=True,
                timeout=60,
                env={**subprocess.os.environ, 'PYTHONUNBUFFERED': '1'}
            )
            
            if result.returncode != 0:
                return {'error': f'Command failed: {result.stderr}'}
            
            data = json.loads(result.stdout)
            
            # Store in cache and update last use time
            self._cache[cache_key] = (data, time.time())
            self._last_pool_use = time.time()
            
            # Trim cache if needed
            self._trim_cache()
            
            return data
        except subprocess.TimeoutExpired:
            return {'error': 'Command timeout'}
        except json.JSONDecodeError as e:
            return {'error': f'JSON parse error: {str(e)}', 'output': result.stdout}
        except Exception as e:
            return {'error': str(e)}
    
    def check_server_status(self) -> Dict[str, Any]:
        """
        Check if the server is actively updating data.
        
        Returns:
            dict: {
                'is_live': bool,
                'last_update': datetime or None,
                'minutes_since_update': int or None,
                'message': str
            }
        """
        return self._call_django_command('server-status')
    
    def get_overall_portal_performance(self) -> Dict[str, Any]:
        """
        Get real-time performance metrics for all portals.
        
        Returns:
            dict: {
                'portals': [...],
                'summary': {...}
            }
        """
        return self._call_django_command('overall-performance')
    
    def get_portal_detailed_analysis(self, domain: str) -> Dict[str, Any]:
        """
        Get detailed real-time analysis for a specific portal.
        
        Args:
            domain: The portal domain to analyze
            
        Returns:
            dict: {
                'domain': str,
                'performance_percentage': float,
                'selector_analysis': {...},
                'page_analysis': {...},
                'error_breakdown': {...},
                'recent_activity': {...}
            }
        """
        return self._call_django_command('portal-details', domain=domain)
    
    def get_live_processing_feed(self, limit: int = 5) -> Dict[str, Any]:
        """
        Get real-time feed of recently processed ads with their performance metrics.
        
        Args:
            limit: Number of recent ads to fetch (default 5)
            
        Returns:
            dict: {
                'ads': [
                    {
                        'id': int,
                        'url': str,
                        'portal': str,
                        'processed_at': str,
                        'fields_extracted': int,
                        'fields_total': int,
                        'completion_rate': float,
                        'has_description_scraper': bool,
                        'critical_fields_present': bool,
                        'processing_time_ms': int (if available)
                    },
                    ...
                ],
                'summary': {
                    'total_processed': int,
                    'avg_completion_rate': float,
                    'processing_rate_per_minute': float,
                    'active_portals': int
                }
            }
        """
        return self._call_django_command('live-feed', limit=str(limit))
    
    def _cleanup_idle_pool(self):
        """Cleanup process pool if idle for too long."""
        if self._process_pool and self._last_pool_use:
            idle_time = time.time() - self._last_pool_use
            if idle_time > self._pool_idle_timeout:
                try:
                    self._process_pool.shutdown(wait=False)
                    self._process_pool = None
                except Exception as e:
                    logger.error(f"Error cleaning up idle pool: {e}")
    
    def cleanup(self):
        """Cleanup resources, especially the process pool."""
        if self._process_pool:
            try:
                self._process_pool.shutdown(wait=False)
                self._process_pool = None
            except Exception as e:
                logger.error(f"Error during cleanup: {e}")
    
    def clear_cache(self):
        """Clear the internal cache."""
        self._cache.clear()
    
    def _trim_cache(self):
        """Trim cache if it exceeds maximum size."""
        if len(self._cache) > self._cache_max_size:
            # Remove oldest entries (first 20%)
            items = list(self._cache.items())
            items.sort(key=lambda x: x[1][1])  # Sort by timestamp
            self._cache = dict(items[int(len(items) * 0.2):])


# Singleton instance
_analytics_instance = None


def get_analytics_instance() -> RealtimeAnalytics:
    """Get or create the analytics singleton instance."""
    global _analytics_instance
    if _analytics_instance is None:
        _analytics_instance = RealtimeAnalytics()
    return _analytics_instance