
    h                         d Z ddlmZ ddlZddlZddlZddlmZ ddlmZm	Z	m
Z
mZmZ ddlmZ  ej                  e      ZddZdd	Zdd
ZddZddd	 	 	 	 	 	 	 ddZdd	 	 	 	 	 	 	 	 	 ddZy)a  
Utilities for copying (transferring) fields from NetworkMonitoredPage
into the parser's extracted dict, based on a mapping stored on ManualDataSource.

Config lives on ManualDataSource as JSON in field "transferred" (preferred)
or legacy "trasferred". Example:

{
  "transferred": {
    "listing_url": "url",
    "source_id": "source_id",
    "source_name": "source.title",
    "offer_kind": "offer_type",
    "estate_kind": "estate_type",
    "inactive_reason": "meta.inactive_reason"
  }
}

Integration (in parse_manual_data):
    from manual_agregator.parser.transferred import apply_transferred, compute_stats

    # ... after selectors parsing + post-rules:
    transferred_keys = apply_transferred(page, manual_config, extracted, skip_if_present=True)

    # when computing stats, exclude transferred keys so stats reflect only selector-extracted fields:
    extracted["_stats"] = compute_stats(selectors, extracted, selected_type, extra_exclude=set(transferred_keys))
    )annotationsN)datetime)AnyDictIterableListTuple   )value_is_emptyc                    | si S t        | t              r| S t        | t              r*	 t        j                  |       }t        |t              r|S i S i S # t
        $ r t        j                  d       i cY S w xY w)z^Return dict from raw if it's already a dict; if it's a JSON string try to parse; otherwise {}.z9transferred: failed to json-load string mapping; ignoring)
isinstancedictstrjsonloads	Exceptionloggerdebug)rawparseds     9/var/www/extractly/manual_agregator/parser/transferred.py_parse_json_if_strr   -   ss    	#t
#s	ZZ_F'56=2= I  	LLTUI	s   &A A  A65A6c                X    t        | dd      xs t        | dd      xs i }t        |      S )zy
    Prefer manual_config.transferred, fall back to legacy 'trasferred'.
    Always returns a dict (possibly empty).
    transferredN
trasferred)getattrr   )manual_configr   s     r   get_transferred_mapr   =   s3     -
5
iP\^b9c
igiCc""    c                    |sy| }t        |      j                  d      D ]5  }| yt        |t              r|j	                  |      })t        ||d      }7 |S )z
    Dot-path getter. Works with object attributes and dict keys.
    Example: deep_get(page, "source.title") or deep_get(page, "meta.inactive_reason")
    N.)r   splitr   r   getr   )objpathcurparts       r   deep_getr(   G   sZ    
 CD	$;c4 ''$-C#tT*C % Jr   c                ~   t        | t        j                        rt        |       S t        | t              r| j                         S t        | t              r-| j                         D ci c]  \  }}|t        |       c}}S t        | t        t        f      r| D cg c]  }t        |       c}S | S c c}}w c c}w )zR
    Make common Django/py types JSON-serializable for json.dumps(extracted).
    )r   uuidUUIDr   r   	isoformatr   itemsto_serializablelisttuple)valkvxs       r   r.   r.   Y   s     #tyy!3x#x }}#t25))+>+$!Q?1%%+>>#e}%,/0Cq"C00J ?0s   )B4B:TF)skip_if_present	overwritec               J   t        |      }|sg S g }|j                         D ]W  \  }}	 t        | |      }	|	|s!|r||v rt	        |j                  |            s9t        |	      ||<   |j                  |       Y |rt        j                  d|       |S # t        $ r Y w xY w)at  
    Copy fields from NetworkMonitoredPage `page` into `extracted` according to mapping
    defined on ManualDataSource.transferred (or legacy trasferred).

    Args:
        page: NetworkMonitoredPage instance.
        manual_config: ManualDataSource instance.
        extracted: dict to update.
        skip_if_present (default True): do NOT overwrite keys that already have a non-empty value in `extracted`.
        overwrite (default False): hard-overwrite regardless of existing value (wins over skip_if_present).

    Returns:
        List of destination keys that were written (so the caller can exclude from stats).
    ztransferred: wrote keys %s)
r   r-   r(   r   r   r#   r.   appendr   r   )
pager   	extractedr5   r6   mappingwrittendestsrc_pathr1   s
             r   apply_transferredr?   h   s    , "-0G	G!--/h	4*C
 ;_y 	d8K)L)#.	$t! *$ 17;N%  		s   B	B"!B")extra_excludec                  dh}|r|t        |      z  }t        | xs i j                         D cg c]	  }||vs| c}      }|j                         D cg c]  \  }}||vst	        |      r| }}}| xs i j                         D cg c]$  }||vst	        |j                  |            s#|& }	}t        |      |||	|xs ddS c c}w c c}}w c c}w )z
    Compute stats for extracted fields, excluding internal keys and optionally transferred keys.
    Use this instead of ad-hoc stats if you want transferred keys excluded.
    _statsdefault)found_fieldstotal_fields
found_keysmissing_keys	type_used)setlenkeysr-   r   r#   )
	selectorsr:   selected_typer@   excluder2   rE   r3   rF   rG   s
             r   compute_statsrO      s     jG3}%%IO#9#9#;P#;aq?O#;PQL ) 1` 11Qg5En]^N_! 1J` )R557q7!1G;KP^_h_l_lmn_oPpA7Lq J$ $"/i 	 Q`qs.   	CCC$C0C	CC2C)returnr   )r$   r   r%   r   rP   r   )r1   r   rP   r   )r:   Dict[str, Any]r5   boolr6   rR   rP   z	List[str])
rL   rQ   r:   rQ   rM   z
str | Noner@   zIterable[str] | NonerP   rQ   )__doc__
__future__r   r   loggingr*   r   typingr   r   r   r   r	   utilsr   	getLogger__name__r   r   r   r(   r.   r?   rO    r   r   <module>r[      s   8 #     3 3 !			8	$ #$( !0 0
 0 0 0p +/ 
 ( r   