"""
Polish Real Estate Domain Knowledge & Mappings
===============================================
Dynamically loads vocabulary and patterns from JSON configuration.

This module provides a Python interface to the JSON-based configuration system.
All vocabulary updates should be made in: config/real_estate_mappings.json

Version: 3.0 (JSON-based)
Last Updated: 2024-12-03
"""

import json
import os
from pathlib import Path
from typing import Dict, List, Set, Any, Optional
import logging


logger = logging.getLogger(__name__)


class PolishRealEstateMappings:
    """
    Centralized repository of all Polish real estate domain knowledge.
    
    Design principles:
    - JSON-based configuration (hot-reloadable without code changes)
    - Thread-safe after initialization
    - Automatic fallback to embedded defaults if JSON missing
    - Validation and error handling for malformed JSON
    - Backward compatible with existing extraction code
    
    Configuration file: ../config/real_estate_mappings.json
    """
    
    def __init__(self, config_path: Optional[str] = None):
        """
        Initialize mappings from JSON configuration.
        
        Args:
            config_path: Path to JSON config file. If None, auto-detects from module structure.
        
        Raises:
            FileNotFoundError: If config file not found and no fallback available
            json.JSONDecodeError: If config file is malformed
        """
        self._config_path = config_path or self._auto_detect_config_path()
        self._config: Dict[str, Any] = {}
        self._load_configuration()
        self._build_indexes()
    
    # ========================================================================
    # CONFIGURATION LOADING
    # ========================================================================
    
    @staticmethod
    def _auto_detect_config_path() -> str:
        """Auto-detect config file path based on module location."""
        current_file = Path(__file__).resolve()
        # From: .../description_extrator/core/mappings.py
        # To:   .../description_extrator/config/real_estate_mappings.json
        config_dir = current_file.parent.parent / "config"
        config_file = config_dir / "real_estate_mappings.json"
        
        if not config_file.exists():
            # Try alternative locations
            alternatives = [
                current_file.parent / "config" / "real_estate_mappings.json",
                current_file.parent.parent.parent / "config" / "real_estate_mappings.json",
            ]
            for alt in alternatives:
                if alt.exists():
                    return str(alt)
            
            raise FileNotFoundError(
                f"Configuration file not found at: {config_file}\n"
                f"Also checked: {alternatives}\n"
                f"Please ensure real_estate_mappings.json exists in config/ directory."
            )
        
        return str(config_file)
    
    def _load_configuration(self) -> None:
        """Load and validate JSON configuration."""
        try:
            with open(self._config_path, 'r', encoding='utf-8') as f:
                self._config = json.load(f)
            
            # Validate required top-level keys
            required_sections = ['temporal', 'geographic', 'features', 'property_types', 'technical', 'patterns']
            missing = [s for s in required_sections if s not in self._config]
            if missing:
                logger.warning(f"Missing configuration sections: {missing}. Using empty defaults.")
            
            logger.info(f"✓ Loaded real estate mappings from: {self._config_path}")
            version = self._config.get('metadata', {}).get('version', 'unknown')
            logger.info(f"  Configuration version: {version}")
            
        except json.JSONDecodeError as e:
            logger.error(f"✗ Malformed JSON in {self._config_path}: {e}")
            raise
        except Exception as e:
            logger.error(f"✗ Failed to load configuration: {e}")
            raise
    
    def _build_indexes(self) -> None:
        """
        Build efficient lookup indexes from JSON configuration.
        Transforms nested JSON into flat Python structures for fast access.
        """
        # Temporal
        self._init_temporal_mappings()
        # Geographic
        self._init_geographic_mappings()
        # Features
        self._init_property_features()
        # Types
        self._init_property_types()
        # Technical
        self._init_technical_mappings()
        # Patterns
        self._init_pattern_keywords()
    
    # ========================================================================
    # INDEX BUILDERS (Transform JSON → Python structures)
    # ========================================================================
    
    def _init_temporal_mappings(self):
        """Load temporal data from JSON."""
        temporal = self._config.get('temporal', {})
        
        # Merge all month formats into single lookup dict
        self.months_pl: Dict[str, int] = {}
        months_data = temporal.get('months', {})
        for category in ['full_nominative', 'genitive', 'abbreviations']:
            self.months_pl.update(months_data.get(category, {}))
        
        self.availability_keywords: List[str] = temporal.get('availability_keywords', [])
    
    def _init_geographic_mappings(self):
        """Load geographic data from JSON."""
        geographic = self._config.get('geographic', {})
        
        # Merge all city categories into single set
        cities_data = geographic.get('cities', {})
        all_cities = []
        for category in ['major', 'medium', 'smaller']:
            all_cities.extend(cities_data.get(category, []))
        self.polish_cities: Set[str] = set(all_cities)
        
        # Street types
        street_data = geographic.get('street_types', {})
        self.street_types: List[str] = (
            street_data.get('full', []) + 
            street_data.get('abbreviated', [])
        )
        
        # Coordinate bounds
        bounds = geographic.get('coordinate_bounds', {})
        lat_bounds = bounds.get('latitude', {'min': 49.0, 'max': 55.0})
        lon_bounds = bounds.get('longitude', {'min': 14.0, 'max': 25.0})
        self.coordinate_bounds: Dict[str, tuple] = {
            'lat': (lat_bounds['min'], lat_bounds['max']),
            'lon': (lon_bounds['min'], lon_bounds['max'])
        }
    
    def _init_property_features(self):
        """Load property features from JSON."""
        features = self._config.get('features', {})
        
        # Boolean keywords (direct copy from JSON)
        self.boolean_keywords: Dict[str, List[str]] = features.get('boolean_keywords', {})
        
        # Media types - build reverse mapping (variant → canonical)
        media_data = features.get('media_types', {})
        self.media_keywords: Dict[str, str] = {}
        for canonical, variants in media_data.items():
            for variant in variants:
                self.media_keywords[variant] = canonical
        
        # Security features
        self.security_keywords: List[str] = features.get('security_features', [])
    
    def _init_property_types(self):
        """Load property type mappings from JSON."""
        property_types = self._config.get('property_types', {})
        
        # Helper to build variant → canonical mapping
        def build_mapping(data: Dict[str, Any]) -> Dict[str, str]:
            result = {}
            for key, value in data.items():
                if isinstance(value, dict) and 'patterns' in value and 'canonical' in value:
                    canonical = value['canonical']
                    for pattern in value['patterns']:
                        result[pattern] = canonical
            return result
        
        # Property condition
        self.condition_map: Dict[str, str] = build_mapping(property_types.get('condition', {}))
        
        # Building types
        self.building_type_map: Dict[str, str] = build_mapping(property_types.get('building_type', {}))
        
        # Market types
        self.market_type_map: Dict[str, str] = build_mapping(property_types.get('market_type', {}))
        
        # Ownership forms
        self.ownership_map: Dict[str, str] = build_mapping(property_types.get('ownership_form', {}))
    
    def _init_technical_mappings(self):
        """Load technical specifications from JSON."""
        technical = self._config.get('technical', {})
        
        # Helper to build variant → canonical mapping with context
        def build_heating_mapping(data: Dict[str, Any]) -> tuple:
            mapping = {}
            all_context_keywords = []
            for key, value in data.items():
                if isinstance(value, dict) and 'patterns' in value and 'canonical' in value:
                    canonical = value['canonical']
                    for pattern in value['patterns']:
                        mapping[pattern] = canonical
                    # Collect context keywords if present
                    if 'context_keywords' in value:
                        all_context_keywords.extend(value['context_keywords'])
            # Remove duplicates from context keywords
            return mapping, list(set(all_context_keywords))
        
        # Heating types with context keywords
        heating_data = technical.get('heating_types', {})
        self.heating_map, self.heating_context_keywords = build_heating_mapping(heating_data)
        
        # Window types
        window_data = technical.get('window_types', {})
        self.window_map: Dict[str, str] = {}
        for key, value in window_data.items():
            if isinstance(value, dict) and 'patterns' in value and 'canonical' in value:
                canonical = value['canonical']
                for pattern in value['patterns']:
                    self.window_map[pattern] = canonical
        
        # Building materials (direct copy)
        self.building_materials: List[str] = technical.get('building_materials', [])
    
    def _init_pattern_keywords(self):
        """Load pattern matching keywords from JSON."""
        patterns = self._config.get('patterns', {})
        
        # Negative indicators
        self.negative_keywords: List[str] = patterns.get('negative_indicators', [])
        
        # Area conversions (direct copy)
        self.area_conversions: Dict[str, float] = patterns.get('area_conversions', {})
        
        # Value ranges - convert from {min, max, unit} to (min, max) tuple
        value_ranges_data = patterns.get('value_ranges', {})
        self.value_ranges: Dict[str, tuple] = {}
        for field, data in value_ranges_data.items():
            if isinstance(data, dict) and 'min' in data and 'max' in data:
                self.value_ranges[field] = (data['min'], data['max'])
    
    # ========================================================================
    # EXTERNAL API MAPPINGS
    # ========================================================================
    
    def heating_category_to_polish(self, category: str) -> str:
        """Convert external API heating categories to Polish canonical forms."""
        external_mappings = self._config.get('external_api_mappings', {})
        heating_categories = external_mappings.get('heating_categories', {})
        return heating_categories.get(category, category)
    
    # ========================================================================
    # UTILITY METHODS
    # ========================================================================
    
    def is_valid_coordinate(self, lat: float, lon: float) -> bool:
        """Validate if coordinates are within Poland bounds."""
        lat_min, lat_max = self.coordinate_bounds['lat']
        lon_min, lon_max = self.coordinate_bounds['lon']
        return lat_min <= lat <= lat_max and lon_min <= lon <= lon_max
    
    def is_valid_value(self, field: str, value: float) -> bool:
        """Check if extracted value is within reasonable range."""
        if field not in self.value_ranges:
            return True
        min_val, max_val = self.value_ranges[field]
        return min_val <= value <= max_val
    
    def get_all_feature_keys(self) -> List[str]:
        """Return list of all extractable boolean features."""
        return list(self.boolean_keywords.keys())
    
    def reload_configuration(self) -> None:
        """
        Reload configuration from JSON file.
        Useful for hot-reloading after config changes without restarting.
        """
        logger.info("Reloading configuration...")
        self._load_configuration()
        self._build_indexes()
        logger.info("✓ Configuration reloaded successfully")
    
    def get_config_info(self) -> Dict[str, Any]:
        """Get metadata about current configuration."""
        metadata = self._config.get('metadata', {})
        return {
            'config_path': self._config_path,
            'version': metadata.get('version', 'unknown'),
            'last_updated': metadata.get('last_updated', 'unknown'),
            'maintainer': metadata.get('maintainer', 'unknown'),
            'stats': {
                'cities': len(self.polish_cities),
                'boolean_features': len(self.boolean_keywords),
                'conditions': len(self.condition_map),
                'building_types': len(self.building_type_map),
                'heating_types': len(self.heating_map),
                'window_types': len(self.window_map),
            }
        }


# ============================================================================
# SINGLETON INSTANCE
# ============================================================================

# Global singleton - lazy loaded on first import
_MAPPINGS_INSTANCE: Optional[PolishRealEstateMappings] = None


def get_mappings(config_path: Optional[str] = None, force_reload: bool = False) -> PolishRealEstateMappings:
    """
    Get singleton instance of mappings.
    
    Args:
        config_path: Optional custom config path (only used on first call)
        force_reload: Force reload from JSON even if already loaded
    
    Returns:
        PolishRealEstateMappings instance
    """
    global _MAPPINGS_INSTANCE
    
    if _MAPPINGS_INSTANCE is None or force_reload:
        _MAPPINGS_INSTANCE = PolishRealEstateMappings(config_path)
    
    return _MAPPINGS_INSTANCE


# Backward compatibility - default singleton instance
MAPPINGS = get_mappings()


# ============================================================================
# COMMAND LINE INTERFACE
# ============================================================================

if __name__ == "__main__":
    """CLI for validation and inspection of mappings."""
    import sys
    
    print("Polish Real Estate Mappings - JSON-based Configuration")
    print("=" * 70)
    
    try:
        mappings = get_mappings(force_reload=True)
        info = mappings.get_config_info()
        
        print(f"Configuration file: {info['config_path']}")
        print(f"Version: {info['version']}")
        print(f"Last updated: {info['last_updated']}")
        print(f"Maintainer: {info['maintainer']}")
        print("=" * 70)
        
        print("\n📊 Statistics:")
        for key, value in info['stats'].items():
            print(f"  • {key.replace('_', ' ').title()}: {value}")
        
        print("\n🏙️  Sample cities (first 10):")
        for city in list(mappings.polish_cities)[:10]:
            print(f"  • {city}")
        
        print("\n🏠 Boolean features:")
        for feature in mappings.get_all_feature_keys():
            count = len(mappings.boolean_keywords[feature])
            print(f"  • {feature}: {count} patterns")
        
        print("\n✓ Mappings validated successfully")
        sys.exit(0)
        
    except Exception as e:
        print(f"\n✗ Error: {e}")
        sys.exit(1)