
    wi>                        U d Z ddlZddlZddlmZ ddlmZmZmZm	Z	m
Z
 ddlZ ej                  e      Z G d d      Zdae
e   ed<   d%de
e   d	ed
efdZ e       Zedk(  rG	 ddlZ ed        ed       	  ed      Zej3                         Z eded            eded            eded            eded            ed        ed       ed   j7                         D ]1  \  ZZ edej=                  dd      j?                          de        3  ed        e ejB                        dd D ]  Z" ede"          ed        ejG                         D ]&  Z$ e%ejL                  e$         Z' ede$ de' d!       (  ed"        ejP                  d       yy# e)$ r'Z* ed#e*         ejP                  d$       Y dZ*[*ydZ*[*ww xY w)&as  
Polish Real Estate Domain Knowledge & Mappings
===============================================
Dynamically loads vocabulary and patterns from JSON configuration.

This module provides a Python interface to the JSON-based configuration system.
All vocabulary updates should be made in: config/real_estate_mappings.json

Version: 3.0 (JSON-based)
Last Updated: 2024-12-03
    N)Path)DictListSetAnyOptionalc                       e Zd ZdZddee   fdZedefd       ZddZ	ddZ
d	 Zd
 Zd Zd Zd Zd ZdedefdZdededefdZdededefdZdee   fdZddZdeeef   fdZy)PolishRealEstateMappingsa  
    Centralized repository of all Polish real estate domain knowledge.
    
    Design principles:
    - JSON-based configuration (hot-reloadable without code changes)
    - Thread-safe after initialization
    - Automatic fallback to embedded defaults if JSON missing
    - Validation and error handling for malformed JSON
    - Backward compatible with existing extraction code
    
    Configuration file: ../config/real_estate_mappings.json
    Nconfig_pathc                     |xs | j                         | _        i | _        | j                          | j	                          y)a^  
        Initialize mappings from JSON configuration.
        
        Args:
            config_path: Path to JSON config file. If None, auto-detects from module structure.
        
        Raises:
            FileNotFoundError: If config file not found and no fallback available
            json.JSONDecodeError: If config file is malformed
        N)_auto_detect_config_path_config_path_config_load_configuration_build_indexes)selfr   s     I/var/www/extractly/manual_agregator/description_extrator/core/mappings.py__init__z!PolishRealEstateMappings.__init__%   s9     (J4+H+H+J')  "    returnc                     t        t              j                         } | j                  j                  dz  }|dz  }|j	                         sn| j                  dz  dz  | j                  j                  j                  dz  dz  g}|D ]  }|j	                         st        |      c S  t        d| d| d      t        |      S )z6Auto-detect config file path based on module location.configzreal_estate_mappings.jsonz!Configuration file not found at: z
Also checked: zE
Please ensure real_estate_mappings.json exists in config/ directory.)r   __file__resolveparentexistsstrFileNotFoundError)current_file
config_dirconfig_filealternativesalts        r   r   z1PolishRealEstateMappings._auto_detect_config_path9   s     H~--/ "((//(:
 #>>!!# ##h.1LL##**11H<?ZZL $::<s8O $ $3K= A!!- /WX  ;r   c                    	 t        | j                  dd      5 }t        j                  |      | _        ddd       g d}|D cg c]  }|| j                  vs| }}|rt
        j                  d| d       t
        j                  d| j                          | j                  j                  d	i       j                  d
d      }t
        j                  d|        y# 1 sw Y   xY wc c}w # t        j                  $ r+}t
        j                  d| j                   d|         d}~wt        $ r}t
        j                  d|         d}~ww xY w)z%Load and validate JSON configuration.rzutf-8)encodingN)temporal
geographicfeaturesproperty_types	technicalpatternsz Missing configuration sections: z. Using empty defaults.u&   ✓ Loaded real estate mappings from: metadataversionunknownz  Configuration version: u   ✗ Malformed JSON in : u"   ✗ Failed to load configuration: )openr   jsonloadr   loggerwarninginfogetJSONDecodeErrorerror	Exception)r   frequired_sectionssmissingr.   es          r   r   z,PolishRealEstateMappings._load_configurationT   s1   	d''w?1#yy| @ !r"3M"3Qq7Lq"3GM!A'JabcKK@ARAR@STUll&&z26::9iPGKK3G9=> @?
 N ## 	LL1$2C2C1DBqcJK 	LL=aSAB	sL   C2 C!C2 C-C-BC2 !C*&C2 2E&D++E7EEc                     | j                          | j                          | j                          | j                          | j	                          | j                          y)z
        Build efficient lookup indexes from JSON configuration.
        Transforms nested JSON into flat Python structures for fast access.
        N)_init_temporal_mappings_init_geographic_mappings_init_property_features_init_property_types_init_technical_mappings_init_pattern_keywordsr   s    r   r   z'PolishRealEstateMappings._build_indexesk   sL     	$$&&&($$&!!#%%'##%r   c                     | j                   j                  di       }i | _        |j                  di       }dD ]-  }| j                  j                  |j                  |i              / |j                  dg       | _        y)zLoad temporal data from JSON.r'   months)full_nominativegenitiveabbreviationsavailability_keywordsN)r   r7   	months_plupdaterM   )r   r'   months_datacategorys       r   rA   z0PolishRealEstateMappings._init_temporal_mappings   sp    <<##J3 *,ll8R0HHNN!!+//(B"?@ I 19=TVX0Y"r   c                    | j                   j                  di       }|j                  di       }g }dD ]#  }|j                  |j                  |g              % t        |      | _        |j                  di       }|j                  dg       |j                  dg       z   | _        |j                  di       }|j                  dd	d
d      }|j                  dddd      }|d   |d   f|d   |d   fd| _        y)zLoad geographic data from JSON.r(   cities)majormediumsmallerstreet_typesfullabbreviatedcoordinate_boundslatitudeg     H@g     K@)minmax	longitudeg      ,@g      9@r\   r]   )latlonN)r   r7   extendsetpolish_citiesrW   rZ   )	r   r(   cities_data
all_citiesrQ   street_databounds
lat_bounds
lon_boundss	            r   rB   z2PolishRealEstateMappings._init_geographic_mappings   s   \\%%lB7
 !nnXr2
6Hkooh;< 7'*: !nn^R8OOFB'OOM2./ 	  3R8ZZ
D,FG
ZZT$-GH
u%z%'89u%z%'894
r   c                 &   | j                   j                  di       }|j                  di       | _        |j                  di       }i | _        |j	                         D ]  \  }}|D ]  }|| j                  |<     |j                  dg       | _        y)z!Load property features from JSON.r)   boolean_keywordsmedia_typessecurity_featuresN)r   r7   rk   media_keywordsitemssecurity_keywords)r   r)   
media_data	canonicalvariantsvariants         r   rC   z0PolishRealEstateMappings._init_property_features   s    <<##J3 7?llCUWY6Z \\-4
.0#-#3#3#5Ix#/8##G, $ $6
 -5LL9Lb,Qr   c                 x   | j                   j                  di       }dt        t        t        f   dt        t        t        f   fd} ||j                  di             | _         ||j                  di             | _         ||j                  di             | _         ||j                  di             | _        y	)
z&Load property type mappings from JSON.r*   datar   c                     i }| j                         D ]4  \  }}t        |t              sd|v sd|v s!|d   }|d   D ]  }|||<   	 6 |S )Nr,   rr   )ro   
isinstancedict)rv   resultkeyvaluerr   patterns         r   build_mappingzDPolishRealEstateMappings._init_property_types.<locals>.build_mapping   s]    F"jjl
UeT*zU/B{V[G[ %k 2I#(#4*3w $5 +
 Mr   	conditionbuilding_typemarket_typeownership_formN)	r   r7   r   r   r   condition_mapbuilding_type_mapmarket_type_mapownership_map)r   r*   r~   s      r   rD   z-PolishRealEstateMappings._init_property_types   s    ))*:B?	S#X 	4S> 	 .;>;M;Mk[];^-_ 2?~?Q?QRace?f1g 0=^=O=OP]_a=b/c .;>;M;MN^`b;c-dr   c                    | j                   j                  di       }dt        t        t        f   dt
        fd}|j                  di       } ||      \  | _        | _        |j                  di       }i | _        |j                         D ]>  \  }}t        |t              sd|v sd|v s!|d   }|d   D ]  }|| j                  |<    @ |j                  d	g       | _        y
)z(Load technical specifications from JSON.r+   rv   r   c                     i }g }| j                         D ]M  \  }}t        |t              sd|v sd|v s!|d   }|d   D ]  }|||<   	 d|v s:|j                  |d          O |t	        t        |            fS )Nr,   rr   context_keywords)ro   rx   ry   ra   listrb   )rv   mappingall_context_keywordsr{   r|   rr   r}   s          r   build_heating_mappingzPPolishRealEstateMappings._init_technical_mappings.<locals>.build_heating_mapping   s    G#% "jjl
UeT*zU/B{V[G[ %k 2I#(#4+4( $5 *U2,33E:L4MN + D%9!:;;;r   heating_typeswindow_typesr,   rr   building_materialsN)r   r7   r   r   r   tupleheating_mapheating_context_keywords
window_mapro   rx   ry   r   )	r   r+   r   heating_datawindow_datar{   r|   rr   r}   s	            r   rE   z1PolishRealEstateMappings._init_technical_mappings   s    LL$$["5		<S#X 	<5 	< !}}_b9:OP\:]7$7  mmNB7*,%++-JC%&:+>;RWCW!+.	$Z0G/8DOOG,  1 . .7]];OQS-Tr   c                 ^   | j                   j                  di       }|j                  dg       | _        |j                  di       | _        |j                  di       }i | _        |j                         D ]7  \  }}t        |t              sd|v sd|v s!|d   |d   f| j                  |<   9 y)z)Load pattern matching keywords from JSON.r,   negative_indicatorsarea_conversionsvalue_rangesr\   r]   N)r   r7   negative_keywordsr   r   ro   rx   ry   )r   r,   value_ranges_datafieldrv   s        r   rF   z/PolishRealEstateMappings._init_pattern_keywords   s    <<##J3 -5LL9NPR,S 3;,,?QSU2V %LL<.0,224KE4$%%4-ETM,0Ke+E!!%( 5r   rQ   c                     | j                   j                  di       }|j                  di       }|j                  ||      S )zBConvert external API heating categories to Polish canonical forms.external_api_mappingsheating_categories)r   r7   )r   rQ   external_mappingsr   s       r   heating_category_to_polishz3PolishRealEstateMappings.heating_category_to_polish  sA     LL,,-DbI.223GL!%%h99r   r_   r`   c                     | j                   d   \  }}| j                   d   \  }}||cxk  xr |k  nc xr ||cxk  xr |k  S c S )z1Validate if coordinates are within Poland bounds.r_   r`   )rZ   )r   r_   r`   lat_minlat_maxlon_minlon_maxs          r   is_valid_coordinatez,PolishRealEstateMappings.is_valid_coordinate  sQ    11%811%8#((FW-Fw-FF-FFr   r   r|   c                 d    || j                   vry| j                   |   \  }}||cxk  xr |k  S c S )z4Check if extracted value is within reasonable range.T)r   )r   r   r|   min_valmax_vals        r   is_valid_valuez'PolishRealEstateMappings.is_valid_value  s>    ))),,U3%*7****r   c                 H    t        | j                  j                               S )z0Return list of all extractable boolean features.)r   rk   keysrG   s    r   get_all_feature_keysz-PolishRealEstateMappings.get_all_feature_keys"  s    D))..011r   c                     t         j                  d       | j                          | j                          t         j                  d       y)z
        Reload configuration from JSON file.
        Useful for hot-reloading after config changes without restarting.
        zReloading configuration...u'   ✓ Configuration reloaded successfullyN)r4   r6   r   r   rG   s    r   reload_configurationz-PolishRealEstateMappings.reload_configuration&  s6    
 	01  "=>r   c                    | j                   j                  di       }| j                  |j                  dd      |j                  dd      |j                  dd      t        | j                        t        | j
                        t        | j                        t        | j                        t        | j                        t        | j                        ddS )z)Get metadata about current configuration.r-   r.   r/   last_updated
maintainer)rS   boolean_features
conditionsbuilding_typesr   r   )r   r.   r   r   stats)
r   r7   r   lenrc   rk   r   r   r   r   )r   r-   s     r   get_config_infoz(PolishRealEstateMappings.get_config_info0  s    <<##J3,,||Iy9$LLC",,|Y?d001$'(=(=$>!$"4"45"%d&<&<"=!$T%5%5!6 #DOO 4
 	
r   )N)r   N)__name__
__module____qualname____doc__r   r   r   staticmethodr   r   r   rA   rB   rC   rD   rE   rF   r   floatboolr   r   r   r   r   r   r   r    r   r   r
   r
      s    HSM (  c    4.&,
Z
6R"e4!UFF*:3 :3 :Gu G5 GT G+C + +$ +2d3i 2?
c3h 
r   r
   _MAPPINGS_INSTANCEr   force_reloadr   c                 4    t         |rt        |       a t         S )a  
    Get singleton instance of mappings.
    
    Args:
        config_path: Optional custom config path (only used on first call)
        force_reload: Force reload from JSON even if already loaded
    
    Returns:
        PolishRealEstateMappings instance
    )r   r
   )r   r   s     r   get_mappingsr   K  s     !\5kBr   __main__z6Polish Real Estate Mappings - JSON-based ConfigurationzF======================================================================T)r   zConfiguration file: z	Version: r.   zLast updated: r   zMaintainer: r   u   
📊 Statistics:r   u     • _ r0   u#   
🏙️  Sample cities (first 10):
   u   
🏠 Boolean features:z	 patternsu$   
✓ Mappings validated successfullyu   
✗ Error:    )NF)+r   r2   ospathlibr   typingr   r   r   r   r   logging	getLoggerr   r4   r
   r   __annotations__r   r   r   MAPPINGSsysprintmappingsr   r6   ro   r{   r|   replacetitler   rc   cityr   featurer   rk   countexitr:   r?   r   r   r   <module>r      s-  
  	  1 1  
		8	$i
 i
b	 :> H56 =hsm $ Sk ( > z8	
BC	(OT2'')$T-%8$9:;	$y/*+,tN3456T,/012h"#w---/JCF3;;sC06689E7CD 0 	45//0"5DF4&/" 6 	()446G11':;EF7)2eWI67 7 	56? B  aS!"s   ;D0F- -G2GG