
    Ih                     (   d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ d d
lmZ d dlmZ d dlmZ dZddddZ	 	 dddddddedz  dededz  dededz  f
dZ	 	 ddddddddedededz  dedz  dededz  fdZy)    N)shared_task)urlparse)sync_to_async)async_playwright)
SourceHtml)get_flagged_pages)propagate_from_page)handle_cookies)logger)is_inactive)status_only_modea  
Object.defineProperty(Navigator.prototype, 'webdriver', { get: () => undefined });
window.chrome = window.chrome || { runtime: {} };
Object.defineProperty(navigator, 'languages', { get: () => ['pl-PL','pl'] });
Object.defineProperty(navigator, 'plugins',  { get: () => [1,2,3,4,5] });
Theadlessrun_actionsc          
        K   d}d}d\  }}| D ]  }	  t        t        j                  j                  |j                        j
                                d{   }	|	s)t        j                  d|j                   d       |dz  }zt        |j                        }
|
j                   d|
j                   d	}||k7  s|sB|j                  |d
       d{    t        |t        |di       xs i        d{    |d}}|j                  |j                  d
       d{   }|r|j                  nd}|j                  }|r-|	j                   r!ddlm}  |||	j                          d{    |j'                          d{   }|	j(                  xs g }t+        |||||gd      \  }}}|rdnd}t        j,                  d| d|j                   d|        |xs g D ]  }t        j,                  d|          t        t.              || |       d{    |dz  } ||fS 7 7 [7 ;7 7 7 7  # t0        $ r6}t        j                  dt        |dd       d|        |dz  }Y d}~ld}~ww xY ww)u   
    Przetwarza przekazaną listę NetworkMonitoredPage w kontekście istniejącej karty `page`.
    Zwraca: (processed_ok:int, processed_err:int)
    NF)r   r   )	source_idz)[STATUS_ONLY] Brak SourceHtml dla source=z
; pomijam.   z:///i$ )timeoutmetaTr   )run_actions_on_page)htmlinactive_configurlhttp_status	redirectstraceINACTIVEACTIVEz[STATUS_ONLY] z :: url=z :: reason=z[STATUS_ONLY][TRACE] )	is_activereasonu   [STATUS_ONLY] Błąd dla r   ?: )r   r   objectsfilterr   firstr   warningr   r   schemenetlocgotor
   getattrstatusactionshtml_agregator.utils.actionsr   contentinactiver   infor	   	Exception)pagespager   r   	base_hostcookies_readyokerrobjhtml_cfgparsedhostresponser   	final_urlr   r   inactive_cfgr   r!   debugstatelineexcs                           ;/var/www/extractly/link_agregator/check_active/run_check.py_process_pages_batchrE      s    
 IMGB4	d]:+=+=+D+Ds}}+D+]+c+cdffH!J3==/Ycdeq cgg&Fmm_Ca8Dy iiei444$T73+C+IrJJJ+/=	 "YYswwY>>H-5(//4KI x//L)$0@0@AAA 'D#,,2L)1 ,'$+*&K #.J8EKK.xyF8TU"3D6:; % 5- 34SO\bccc!GBa n s7Ni g 5J ? B ($ d  	NN6wsE37O6PPRSVRWXY1HC	s   JAI H..I JAI H1"I 9H4:(I "H7#A
I -H:.I H<BI H>	I 'J.I 1I 4I 7I :I <I >I  	I?	+I:4J:I??Jid)r   source_namer   order_bylimitr   rG   r   rH   c                F  K   t               5  t        | |||      } t        t              |       d {   }|s"t	        j
                  d       ddicd d d        S t	        j
                  dt        |              t               4 d {   }|j                  j                  |g d       d {   }	|	j                  dd	d
dddddd       d {   }
|
j                  t               d {    |
j                          d {   }t        ||||       d {   \  }}|	j                          d {    ||dcd d d       d {    cd d d        S 7 .7 7 7 7 v7 `7 K7 27 # 1 d {  7  sw Y   nxY w	 d d d        y # 1 sw Y   y xY ww)NrI   r   rG   rH   u,   [STATUS_ONLY] Brak rekordów do sprawdzenia.checkedr   z[STATUS_ONLY] Do sprawdzenia: z---disable-blink-features=AutomationControlledz--no-sandboxz--disable-dev-shm-usager   argsoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36pl-PLEurope/WarsawV     widthheightpl-PL,pl;q=0.9,en;q=0.81zAccept-LanguagezUpgrade-Insecure-Requests
user_agentlocaletimezone_idviewportextra_http_headersr   )rL   errors)r   r   r   listr   r1   lenr   chromiumlaunchnew_contextadd_init_scriptINIT_STEALTHnew_pagerE   close)rI   r   r   rG   r   rH   pages_qsr3   pbrowsercontextr4   r7   r8   s                 rD   run_status_only_checksro   ]   s     
	$#	
 *mD)(33KKFGq> 
	 	4SZLAB#%%JJ--!q .  G $//Q+#'377Por#s 0  G )),777 ))++D0x]hiiGB --/!!!S1) &%% 
	 4 & 8+i "' &%%%% 
		s   F!(FE#F
F!/FE&F#E64E(5#E6E*E65E,6E6E.E6$E0%E6?E2 E6FE4F
F!#F&F(E6*E6,E6.E60E62E64F6F	<E?=F	F	F!FF!)r   rG   max_batchesr   rH   
batch_sizerp   c                f  K   t               5  dx}}d}	t               }
t               4 d{   }|j                  j	                  |g d       d{   }|j                  ddddd	d
ddd       d{   }|j                  t               d{    |j                          d{   }	 ||	|k\  rt        j                  d| d       nt        | |||      } t        t              |       d{   }|st        j                  d       n|D ch c]  }|j                   }}|j                  |
      r"t        j                   dt#        |              nXt        j                  d|	dz    dt#        |       d       t%        ||||       d{   \  }}||z  }||z  }|	dz  }	|
|z  }
	|j'                          d{    ddd      d{    d|	||dcddd       S 7 7 7 7 h7 S7 c c}w 7 l7 =7 /# 1 d{  7  sw Y   ?xY w# 1 sw Y   yxY ww)u   
    Pipeline: mieli wszystkie oflagowane rekordy w partiach po `batch_size`.
    Kończy gdy:
      - nie ma już nic do pobrania, albo
      - brak postępu (ciągle te same ID), albo
      - osiągnięto max_batches (jeśli podane).
    r   NrM   rN   rP   rQ   rR   rS   rT   rU   rX   rY   rZ   r[   z$[PIPELINE] Przerwano po max_batches=.rK   u(   [PIPELINE] Nic więcej do przetworzenia.uQ   [PIPELINE] Brak postępu (powtarzają się te same ID). Zatrzymuję. remaining≈z[PIPELINE] Batch #r   r#   u    rekordów...r   pipeline)modebatchesrL   ra   )r   setr   rd   re   rf   rg   rh   ri   r   r1   r   r   rb   rF   issubsetr'   rc   rE   rj   )rq   r   r   rG   rp   r   rH   total_ok	total_errrv   seen_idsrl   rm   rn   r4   rk   r3   idsr7   r8   s                       rD   run_status_pipeliner}      sN    " 
	  95#%%JJ--!q .  G $//Q+#'377Por#s 0  G )),777 ))++D*w+/EKK"F{mST UV,$' +%	 2mD1(;;KK JK%*+UqttU+<<)NN%vwz{~w  wA  $B  C02c%j\WX 4UD8al mmCBS 	1C7 : --/!![ &%^ #w8W`ai 
	
 & 8+ <
 , n "[ &%%% 
	s   H1H%G3H%#HG6#H7G98HG<H,G?-AH:H; HH.A/HH
0HHHH%H
H%)
H13H%6H9H<H?HHHHH%H"	HH"	H%%H.*H1)NT)   T)asyncioceleryr   urllib.parser   asgiref.syncr   playwright.async_apir   extractly.modelsr   #link_agregator.check_active.flaggedr   %link_agregator.check_active.propagater	   link_agregator.utils.cookiesr
   link_agregator.utils.loggerr   $link_agregator.check_active.inactiver   r   #link_agregator.check_active.contextr   rh   rE   intboolstrro   r}        rD   <module>r      s     ! & 1 ' A E 7 . H @ 9=$ @H ,2 ",2:,2,2
 t,2 ,2 Dj,2` Eb ""EbEbEb
 tEb tEb Eb DjEbr   