
    Ih                         d Z ddlZddlmZmZmZmZmZ ddlZddl	m
Z
 d ZdedefdZd	ee   d
ee   defdZ	 	 ddeeef   de
dedee   def
dZdefdZ	 	 ddede
dee   deeee   f   fdZy)ap  
Inactive rules evaluator.

Supported NEW-style conditions inside a single predicate dict:
  - selector_exists: "css"
  - selector_missing: "css"
  - selector_text_equals: { "selector": "css", "text": "..." }
  - selector_contains: { "selector": "css", "text": "..." }
  - selector_exists_any: ["css1", "css2", ...]
  - selector_missing_all: ["css1", "css2", ...]
  - text_contains: "..." | ["...", "..."]
  - text_contains_any: ["...", "..."]
  - field_empty: "name"        (reads from `extracted`)
  - field_missing: "name"      (reads from `extracted`)
  - url_contains: "..."        (reads from page.url)

Legacy compatibility (single dict with "type"):
  - { "type": "text_contains", "text": "..." }
  - { "type": "selector_missing", "selector": "..." }
  - { "type": "selector_contains", "selector": "...", "text": "..." }
  - { "type": "selector_text", "selector": "...", "text": "..." }
  - { "type": "source_field_match", "field": "...", "match": "..." }

Top-level rule styles:
  - NEW: { "when": { "all":[...], "any":[...], "none":[...] }, "reason":"..." }
  - LEGACY single condition dict (treated like NEW with any=[...]).
    N)AnyDictIterableTupleOptional)BeautifulSoupc                 R    | g S t        | t        t        f      rt        |       S | gS N)
isinstancelisttuple)xs    6/var/www/extractly/manual_agregator/parser/inactive.py_as_listr   $   s*    y	!dE]#Aw3J    sreturnc                     | xs dj                         } t        j                  d|       } dj                  d | D              } dj                  | j	                               S )zBLowercase, strip diacritics, collapse whitespace to single spaces. NFKDc              3   L   K   | ]  }t        j                  |      r|  y wr
   )unicodedata	combining).0chs     r   	<genexpr>z#_normalize_ascii.<locals>.<genexpr>1   s     @Qrk&;&;B&?Qs   $$ )lowerr   	normalizejoinsplit)r   s    r   _normalize_asciir"   ,   sM    	
bAfa(A
@Q@@A88AGGIr   haystackneedlesc                     | syt        |       j                         }t        |      }|D ].  }|xs dj                         }|s||v r yt        |      |v s. y y)NFr   T)strr   r"   )r#   r$   lowlow_normnn_lows         r   _text_containsr+   6   sc    
h-


C$Hb!C<E"h.  r   condsouphtml	extractedc                    d| v r/| j                  d      }t        |xr j                  |            S d| v r1| j                  d      }t        |      xr j                  |      du S d| v r| j                  d      xs i }|j                  d      }|j                  d      xs dj                         j	                         }|rj                  |      nd}t        |xr2 |j                  dd	
      j                         j	                         |k(        S d| v rt        | d   t              r~| d   }|j                  d      }|j                  d      xs dj	                         }|rj                  |      nd}t        |xr# ||j                  dd	
      j	                         v       S d| v r.t        | j                  d            }	t        fd|	D              S d| v r.t        | j                  d            }	t        fd|	D              S d| v r&t        | j                  d            }
t        ||
      S d| v r&t        | j                  d            }
t        ||
      S d| v rf|y| j                  d      }|j                  |      }|y	t        |t              r|j                         dk(  ry	t        |t        t        f      r|sy	yd| v r'|y| j                  d      }|j                  |      du S d| v rS|Q| j                  d      xs dj	                         }t        |xr! |t        |dd      xs dj	                         v       S | j                  d      }|dk(  r$t        |t        | j                  d                  S |dk(  r1| j                  d      }t        |      xr j                  |      du S |dk(  ry| j                  d      }| j                  d      xs dj	                         }|rj                  |      nd}t        |xr# ||j                  dd	
      j	                         v       S |dk(  r| j                  d      }| j                  d      xs dj                         j	                         }|rj                  |      nd}t        |xr2 |j                  dd	
      j                         j	                         |k(        S |dk(  r|| j                  d      }| j                  d      xs dj	                         }t        ||d      }	 t        |t              rt        j                   |d      }t        |xr |t        |      j	                         v       S y# t"        $ r Y 4w xY w)z!Evaluate a single condition dict.selector_existsselector_missingNselector_text_equalsselectortextr   r   T)stripselector_containsselector_exists_anyc              3   H   K   | ]  }|xr j                  |        y wr
   
select_oner   r   r-   s     r   r   z_cond_ok.<locals>.<genexpr>k   s#     :T1+++Ts   "selector_missing_allc              3   L   K   | ]  }|xr j                  |      d u   y wr
   r:   r<   s     r   r   z_cond_ok.<locals>.<genexpr>o   s(     Dt!15$//!,45ts   !$text_containstext_contains_anyfield_emptyFfield_missingurl_containsurltypeselector_textsource_field_matchfieldmatch)ensure_ascii)getboolr;   r6   r   get_textr   dictr   anyallr+   r&   r   getattrjsondumps	Exception)r,   r-   r.   pager/   selcfgtxtelselsr$   keyvalfragtrH   rI   values    `                r   _cond_okr`   H   s    D hh()C0DOOC011T!hh)*Cy;dooc2d:;%hh-.4"ggj!wwv$"++-335%(T__S!dBN2;;s$;7==?EEG3NOOd"z$7J2KT'R&'ggj!wwv$"++-%(T__S!dBF3"++c+">"D"D"FFGG $!678:T:::%!789DtDDD$488O45dG,,d"488$789dG,,hh}%mmC ;c3CIIK2$5cD$<($hh'}}S!T))$"2(.B557DMTgdE2&>&D"%K%K%MMNN 	AOdHTXXf-=$>??hhz"Cy;dooc2d:;hhz"xx%2,,.%(T__S!dBF3"++c+">"D"D"FFGGOhhz"xx%2,,.446%(T__S!dBN2;;s$;7==?EEG3NOO  T%5!'"(b//1eR(	%&

5u= E9es5z'7'7'99::	  		s   $'U4 4	V ?V c                     | sg S t        | t              r	 t        j                  |       }n| }t        |t
              r|gS t        |t        t        f      rt        |      S g S # t        $ r g cY S w xY w)z)Normalize rules input to a list of dicts.)r   r&   rR   loadsrT   rN   r   r   )rulesr\   s     r   _normalize_rulesrd      sr    	%	**U#C #tu#e}%CyI  	I	s   A$ $A21A2c           	           xs d t        |      }|D ];  }t        |t              s|j                  d      }t        |t              rt	         fdt        |j                  d            D              }t        |j                  d            }	|	sdnt         fd|	D              }
t         fdt        |j                  d	            D               }|r2|
r0|r.|j                  d
      xs |j                  d      xs d}d|fc S t        |       s|j                  d
      xs |j                  d      xs d}d|fc S  y)a  
    Returns (inactive_bool, reason_str).

    NEW style rule:
      { "when": { "all":[...], "any":[...], "none":[...] }, "reason": "..." }

    LEGACY style: a single condition dict is treated as NEW with `any=[...]`.

    Empty rules -> (False, None).
    r   whenc              3   <   K   | ]  }t        |        y wr
   r`   r   cr/   r.   rU   r-   s     r   r   zis_inactive.<locals>.<genexpr>   s!     eKda!T4yAKd   rP   rO   Tc              3   <   K   | ]  }t        |        y wr
   rh   ri   s     r   r   zis_inactive.<locals>.<genexpr>   s#     2nem`a8AtT4QZ3[emrk   c              3   <   K   | ]  }t        |        y wr
   rh   ri   s     r   r   zis_inactive.<locals>.<genexpr>   s!     kPj1hq$dIFPjrk   nonereasonname
rule_matchrE   legacy_rule_match)FN)rd   r   rN   rK   rP   r   rO   r`   )r.   r-   rc   rU   r/   
rules_listrulerf   all_okany_listany_oknone_okro   s   `` ``        r   is_inactivery      s.   " :2D!%(J$%xxdD!e8TXT\T\]bTcKdeeF0H!)Ts2nem2n/nFkPXY]YaYabhYiPjkkkG&W(+Otxx/?O<V|# D$dI6XXh'R488F+;R?RF<' * r   )NN)__doc__rR   typingr   r   r   r   r   r   bs4r   r   r&   r"   rL   r+   rN   r`   r   rd   ry    r   r   <module>r~      s   8  7 7    Xc] Xc] t , 
 $d
sCx.d
d d
 ~d 
dNt , 
 $)
)
)
 ~) 4#)r   