
    h                        d Z ddlmZ ddlZddlmZ ddZddZddZddZ	dd	Z
dd
ZddZeeee	e
eedZddZddZdddZy)u  
Elastyczne reguły wyboru gałęzi selektorów i ustawiania pól.

Predykaty (możesz używać w "when"):
- selector_exists:   "#css"
- selector_missing:  "#css"
- selector_contains: {"selector":"#css", "text":"fragment"}
- text_contains:     "fragment"
- url_matches:       "regex"
- field_empty:       "price"         (tylko w fazie POST – po wstępnym parsowaniu)
- field_equals:      {"field":"...", "value": ...}  (tylko w fazie POST)

Grupowanie:
when = { "all":[...], "any":[...], "none":[...] }

Działania:
- skrót:  {"type": "otodom_v2", "when": {...}}
- pełne:  {"when": {...}, "then": {"set_type": "otodom_v2", "set": {"k":"v"}}}

Zwraca (set_type:str|None, set_fields:dict)
    )annotationsN)BeautifulSoupc                    t        | t              r| n| j                  d      }t        |xr |j	                  |            S Nselector
isinstancestrgetbool
select_onepsoup_sels       3/var/www/extractly/manual_agregator/parser/rules.py_pred_selector_existsr       s4    !S!!quuZ'8C,,--    c                    t        | t              r| n| j                  d      }t        |xr |j	                  |             S r   r   r   s       r   _pred_selector_missingr   $   s7    !S!!quuZ'8C0DOOC0011r   c                :   t        | t              sy| j                  d      }| j                  d      xs dj                         j	                         }|r|j                  |      nd }t        |xr' |xr# ||j                  dd      j	                         v       S )NFr   text  Tstrip)r	   dictr   r   lowerr   r   get_text)r   r   r   r   txtels         r   _pred_selector_containsr#   (   s    au
%%

CAEE&M$7R#>#>#@#F#F#HS!$	$BJsJsbkk#Tk&B&H&H&JJKKr   c                    | xs dj                         j                         }t        |xr# ||j                  dd      j                         v       S )Nr   r   Tr   )r   r   r   r    )r   r   r   r!   s       r   _pred_text_containsr%   .   sF    7//

!
!
#CEt}}S}=CCEEFFr   c                    t        | t              r| n| j                  d      }t        |dd      xs d}	 t	        |xr t        j                  ||            S # t
        j                  $ r Y yw xY w)Npatternurlr   F)r	   r
   r   getattrr   researcherror)r   pager   r'   r(   s        r   _pred_url_matchesr.   2   sd    a%a155+;G
$r
"
(bCG7		'3 78888 s   "A A/.A/c                    t        | t              r| n| j                  d      }|j                  |d       }|d dg i ddfv S )Nfieldr   z{}z[])r	   r
   r   )r   	extractedr   keyvs        r   _pred_field_emptyr4   :   sB    !S!!quuW~Cc4 Ar2r4...r   c                    t        | t              sy| j                  d      }| j                  d      }|j                  |d       |k(  S )NFr0   value)r	   r   r   )r   r1   r   r2   vals        r   _pred_field_equalsr8   ?   s=    au
%%.Cg#==d#s**r   )selector_existsselector_missingselector_containstext_containsurl_matchesfield_emptyfield_equalsc                    t        | t              rt        |       dk7  ryt        t	        | j                                     \  }}t        j                  |      }|sy|dk(  r|dv ryt         |||||            S )N   Fpre)r>   r?   )r   r-   r1   )	r	   r   lennextiteritems
PREDICATESr   r   )predr   r-   r1   phasenameargfns           r   	_eval_onerM   N   sn    dD!SY!^T$**,'(ID#		B~$"AA3T	BCCr   c                   | syt        fd| j                  dg       D              }| j                  dg       }|rt        fd|D              nd}t        fd| j                  dg       D               }|xr |xr |S )NTc              3  <   K   | ]  }t        |        y wNrM   .0r   r1   r-   rI   r   s     r   	<genexpr>z_eval_when.<locals>.<genexpr>]   s!     YEX1dD)U;EX   allanyc              3  <   K   | ]  }t        |        y wrP   rQ   rR   s     r   rT   z_eval_when.<locals>.<genexpr>_   s     NX1dD)U;XrU   c              3  <   K   | ]  }t        |        y wrP   rQ   rR   s     r   rT   z_eval_when.<locals>.<genexpr>`   s!     _J^Qi4y%@J^rU   none)rV   r   rW   )	whenr   r-   r1   rI   ok_allany_listok_anyok_nones	    ````    r   
_eval_whenr`   Z   sz    YTXXeUWEXYYFxxr"HRZSNXNN`dF_$((SY[]J^___G(f((r   c                   t        | t        t        f      sdi fS | D ]  }t        |t              s|j	                  d      xs i }t        |||||      s9|j	                  d      }|j	                  d      xs i }|j	                  d      r|d   }t        |j	                  d      t              r|j	                  d      ni }	||	fc S  di fS )zp
    rules: list[dict] (patrz docstring)
    phase: "pre" | "post"
    Zwraca (override_type, extra_fields)
    Nr[   typethenset_typeset)r	   listtupler   r   r`   )
rulesr   r-   r1   rI   ruler[   rd   rc   
set_fieldss
             r   apply_dynamic_rulesrk   c   s     edE]+Rx$%xx%2$dIu=88F#xx%288JJ'H(2488E?D(ITXXe_r
##   8Or   )r   r   rP   )r1   r   )rH   r   rI   r
   )r[   zdict | NonerI   r
   )rB   )rI   r
   )__doc__
__future__r   r*   bs4r   r   r   r#   r%   r.   r4   r8   rG   rM   r`   rk    r   r   <module>rp      sh   , # 	 .2LG/
+ //0,**+

D)r   