
    Ih0                        d Z ddlmZ ddlZddlZddlmZmZmZm	Z	m
Z
mZ ddlmZ d ZddZddZdd	Zddddd
dd	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 	 ddZddZddddd
d	 	 	 	 	 	 	 	 	 	 	 	 	 ddZy)ud  
Inactive rules evaluator (NEW + LEGACY) z trace.

Obsługiwane warunki (pojedynczy dict):
  NEW:
    - selector_exists: "css"
    - selector_missing: "css"
    - selector_text_equals: { "selector": "css", "text": "..." }
    - selector_contains: { "selector": "css", "text": "..." }
    - selector_exists_any: ["css1", "css2", ...]
    - selector_missing_all: ["css1", "css2", ...]
    - text_contains: "..." | ["...", "..."]
    - text_contains_any: ["...", "..."]
    - field_empty: "name"          (czyta z `extracted`)
    - field_missing: "name"        (czyta z `extracted`)
    - url_contains: "..."          (czyta z param. `url`)
    - http_status_in: [404, 410]   (czyta z param. `http_status`)
    - redirect_url_contains_any: ["nie-znaleziono","ad-not-available"] (czyta z `redirects`|`url`)
    - redirected: true/false       (czyta z `redirects`)

  LEGACY (pojedynczy dict z "type"):
    - { "type": "text_contains", "text": "..." }
    - { "type": "selector_missing", "selector": "..." }
    - { "type": "selector_contains", "selector": "...", "text": "..." }
    - { "type": "selector_text", "selector": "...", "text": "..." }
    - { "type": "source_field_match", "field": "...", "match": "..." }

Top-level:
  - NOWY: { "when": { "all":[...], "any":[...], "none":[...] }, "reason":"..." }
  - LEGACY: pojedynczy condition-dict (traktowany jak NEW z any=[...]).
    )annotationsN)AnyDictIterableListOptionalTuple)BeautifulSoupc                R    | g S t        | t        t        f      rt        |       S | gS N)
isinstancelisttuple)xs    :/var/www/extractly/link_agregator/check_active/inactive.py_as_listr   -   s*    y	!dE]#Aw3J    c                    | xs dj                         } t        j                  d|       } dj                  d | D              } dj                  | j	                               S )N NFKDc              3  L   K   | ]  }t        j                  |      r|  y wr   )unicodedata	combining).0chs     r   	<genexpr>z#_normalize_ascii.<locals>.<genexpr>8   s     @Qrk&;&;B&?Qs   $$ )lowerr   	normalizejoinsplit)ss    r   _normalize_asciir#   5   sM    	
bAfa(A
@Q@@A88AGGIr   c                    | syt        |       j                         }t        |      }|D ].  }|xs dj                         }|s||v r yt        |      |v s. y y)NFr   T)strr   r#   )haystackneedleslowlow_normnn_lows         r   _text_containsr,   <   sc    
h-


C$Hb!C<E"h.  r   c                b    	 t        | xs dd      S # t        $ r t        | xs dd      cY S w xY w)Nr   lxmlzhtml.parser)r
   	Exception)htmls    r   
_safe_soupr1   L   s8    8TZR00 8TZR778s    ..Furlhttp_status	redirects	extractedtracedebugc               6   ||ng }	d| v rI| j                  d      }
t        |
xr j                  |
            }|r|r|	j                  d|
        |S d| v rK| j                  d      }
t        |
      xr j                  |
      d u }|r|r|	j                  d|
        |S d| v r| j                  d      xs i }|j                  d      }
|j                  d      xs dj	                         j                         }|
rj                  |
      nd }t        |xr2 |j                  d	d
      j	                         j                         |k(        }|r|r|	j                  d|
 d| d       |S d| v rt        | d   t              r| d   }|j                  d      }
|j                  d      xs dj                         }|
rj                  |
      nd }t        |xr# ||j                  d	d
      j                         v       }|r|r|	j                  d|
 d| d       |S d| v rNt        | j                  d            }|D ].  }	 |r)j                  |      r|r|	j                  d|         y
0 yd| v rHt        | j                  d            }t        fd|D              }|r|r|	j                  d|        |S d| v r@t        | j                  d            }t        ||      }|r|r|	j                  d|        |S d| v r@t        | j                  d            }t        ||      }|r|r|	j                  d|        |S d| v r|y| j                  d      }|j                  |      }|d u xs@ t        |t              xr |j	                          xs t        |t        t        f      xr | }|r|r|	j                  d|        |S d | v rA|y| j                  d       }|j                  |      d u }|r|r|	j                  d!|        |S d"| v rc| j                  d"      xs dj                         }t        |xr |xr ||j                         v       }|r|r|	j                  d#| d$|        |S d%| v r<t        | j                  d%            }|||v nd}|r|r|	j                  d&|        |S d'| v rSt        | j                  d'            }t        |xr t        |      d(kD        }||u }|r|r|	j                  d)|        |S d*| v rpt        | j                  d*            }t        |xs g       }|r|j                  |       |D ].  t!        fd+|D              s|r|	j                  d,         y
 y| j                  d-      }|dk(  rMt        |t        | j                  d                  }|r%|r#|	j                  d.| j                  d              |S |dk(  rK| j                  d      }
t        |
      xr j                  |
      d u }|r|r|	j                  d/|
        |S |dk(  r| j                  d      }
| j                  d      xs dj                         }|
rj                  |
      nd }t        |xr# ||j                  d	d
      j                         v       }|r|r|	j                  d0|
 d| d       |S |d1k(  r| j                  d      }
| j                  d      xs dj	                         j                         }|
rj                  |
      nd }t        |xr2 |j                  d	d
      j	                         j                         |k(        }|r|r|	j                  d2|
 d| d       |S |d3k(  r| j                  d4      }| j                  d5      xs dj                         }d }	 t#        ||d      }	 t        |t              rt%        j&                  |d6      }t        |xr |t        |      j                         v       }|r|r|	j                  d7| d| d       |S y# t        $ r$}|r|	j                  d| d|        Y d }~.d }~ww xY w# t        $ r d}Y w xY w# t        $ r Y w xY w)8Nselector_existsz[when.selector_exists] selector_missingz[when.selector_missing] selector_text_equalsselectortextr   r   T)stripz[when.selector_text_equals] z == ''selector_containsz[when.selector_contains] z ~ 'selector_exists_anyz[when.selector_exists_any] z![when.selector_exists_any] error z: Fselector_missing_allc              3  L   K   | ]  }|xr j                  |      d u   y wr   )
select_one)r   r"   soups     r   r   z_cond_ok.<locals>.<genexpr>   s(     BT3*d23Ts   !$z[when.selector_missing_all] text_containsz[when.text_contains] text_contains_anyz[when.text_contains_any] field_emptyz[when.field_empty] field_missingz[when.field_missing] url_containsz[when.url_contains] 'z' in http_status_inz[when.http_status_in] 
redirected   z[when.redirected] redirect_url_contains_anyc              3  n   K   | ],  }|xs d j                         xs d j                         v  . yw)r   N)r   )r   phus     r   r   z_cond_ok.<locals>.<genexpr>   s/     MWrBH"##%!'r)::Ws   25z([when.redirect_url_contains_any] hit in typez[legacy.text_contains] z[legacy.selector_missing] z[legacy.selector_contains] selector_textz[legacy.selector_text] source_field_matchfieldmatch)ensure_asciiz[legacy.source_field_match] )getboolrE   appendr?   r   get_textr   dictr   r/   allr,   r%   r   lenanygetattrjsondumps)condrF   r0   r3   r4   r5   r6   r7   r8   dbgselokcfgtxtelselsr"   er'   keyvalfragcodeswantredirected_nowurls_to_checktrV   rW   valuerR   s    `                            @r   _cond_okrv   U   s    $%"C D hh()#.$//#./RJJ067	T!hh)*#Y9DOOC0D8RJJ1#78	%hh-.4"ggj!wwv$"++-335%(T__S!d"LS5;;=CCELMRJJ5cU%uAFG	d"z$7J2KT'R&'ggj!wwv$"++-%(T__S!d"DCt < B B DDERJJ23%tC5BC	$!678AM+

%@#DE  %!789BTBBRJJ5dV<=	$488O45D'*RJJ.wi89	d"488$789D'*RJJ27)<=	hh}%mmC Tkwz#s3GCIIKwZX[^bdh]iMjMvsvovRJJ,SE23	$hh']]34'RJJ.se45	(.B557$6364399;#67RJJ.tfE#?@	4"234%0%<[E!%RJJ/}=>	tDHH\*+i>C	NQ,>?$RJJ+N+;<=	"d*488$?@AY_"-  %AMWMMJJ!I!MN	 
  	AOD(488F+;"<=RJJ0&1A0BCD	hhz"#Y9DOOC0D8RJJ3C59:	hhz"xx%2,,.%(T__S!d"DCt < B B DDERJJ4SEcU!DE	Ohhz"xx%2,,.446%(T__S!d"LS5;;=CCELMRJJ0U3%qAB	  !'"(b//1	C+E	%&

5u= %7ESZ%5%5%778RJJ5eWDqIJ	I  MJJ!B1#RsKLMp  	E	
  		s<   	)aa; 'b 	a8a33a8;b	b		bbc                    | sg S t        | t              r	 t        j                  |       }n| }t        |t
              r|gS t        |t        t        f      rt        |      S g S # t        $ r g cY S w xY wr   )r   r%   rb   loadsr/   r]   r   r   )rulesrn   s     r   _normalize_rulesrz     sr    	%	**U#C #tu#e}%CyI  	I	s   A$ $A21A2)r3   r4   r5   r6   r7   c                    xs d t        |      }g t        |      D ]  \  }}	t        |	t              s|	j	                  d      }
t        |
t              rt               t         fdt        |
j	                  d            D              }t        |
j	                  d            }|sdnt         fd|D              }t         fdt        |
j	                  d	            D               }|rT|rR|rP|	j	                  d
      xs |	j	                  d      xs d| d}rj                  d| d       d|fc S d|fc S =t               t        |	 	      s^|	j	                  d
      xs |	j	                  d      xs d}rj                  d| d       d|fc S d|fc S  d}rg |S |S )z
    Zwraca:
      - gdy trace=False: (is_inactive, reason)
      - gdy trace=True:  (is_inactive, reason, debug:list[str])
    r   whenc              3  F   K   | ]  }t        |	 	        ywr2   Nrv   
r   cr8   r6   r0   r4   r5   rF   r7   r3   s
     r   r   zis_inactive.<locals>.<genexpr>C  s;       3A D$C[T]#,EH H2   !r^   r`   Tc              3  F   K   | ]  }t        |	 	        ywr~   r   r   s
     r   r   zis_inactive.<locals>.<genexpr>I  s;      3 "A D$C[T]#,EH H!r   c              3  F   K   | ]  }t        |	 	        ywr~   r   r   s
     r   r   zis_inactive.<locals>.<genexpr>N  s;       4A D$C[T]#,EH H3r   nonereasonnamerule__matchz[when] matched => reason='r@   r2   rS   legacy_rule_matchz[legacy] matched => reason=')Fu   Brak oznak nieaktywności)rz   	enumerater   r]   rY   r1   r^   r   r`   r[   rv   )r0   inactive_configr3   r4   r5   r6   r7   
rules_listidxruler|   all_okany_listany_oknone_okr   outr8   rF   s   ` `````          @@r   is_inactiver   (  s    :2D!/2JEz*	T$% xxdD!d#D   "$((5/2 F
  0H!)Ts 3 3 "3 0F
    "$((6"23  G
 &W(+Vtxx/?VU3%vCVLL#=fXQ!GH..V|# $D$#;R['uECXXh'R488F+;R?RF;F81EFVU**<Q +T /C!=S=%=*s*r   )r"   r%   returnr%   )r&   Optional[str]r'   zIterable[str]r   rZ   )r0   z
str | Noner   r
   )rd   zDict[str, Any]rF   r
   r0   r%   r3   r   r4   Optional[int]r5   Optional[List[str]]r6   Optional[dict]r7   rZ   r8   zList[str] | Noner   rZ   )r   r   )r0   r%   r3   r   r4   r   r5   r   r6   r   r7   rZ   r   z.Tuple[bool, str] | Tuple[bool, str, List[str]])__doc__
__future__r   rb   r   typingr   r   r   r   r   r	   bs4r
   r   r#   r,   r1   rv   rz   r    r   r   <module>r      s  @ #   = = 
 8 !%%) $"}
}
} }
 
} } #} } } } 
}@. !%%) $>+
>+ 
	>+
 >+ #>+ >+ >+ 4>+r   