
    GFh^              
           d dl Z d dlmZ d dlmZ d dlmZ d dlmZ d dl	m
Z
 d dlmZ d dlmZ d d	lmZ g d
Z eddddddddd      ZdZdZdZeddedededefd       Zed        Zed        Zy)     N)shared_task)sync_to_async)async_playwright)fetch_and_save_html_for_pages)get_pages_to_process)NetworkMonitoredPage)scrape_all_sources)cache)z---disable-blink-features=AutomationControlledz--no-sandboxz--disable-dev-shm-usagezoMozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36zpl-PLzEurope/WarsawiV  i   )widthheightzpl-PL,pl;q=0.9,en;q=0.81)zAccept-LanguagezUpgrade-Insecure-Requests)
user_agentlocaletimezone_idviewportextra_http_headersa  
Object.defineProperty(Navigator.prototype, 'webdriver', { get: () => undefined });
window.chrome = window.chrome || { runtime: {} };
Object.defineProperty(navigator, 'languages', { get: () => ['pl-PL','pl'] });
Object.defineProperty(navigator, 'plugins',  { get: () => [1,2,3,4,5] });
html_aggr_locki  enableinclude_fetchedrequeue_if_lockedrequeue_delayc                 `    t        j                  t        dt              }|s-t	        d       |rt
        j                   dd|       ddiS  fd	}	 t        j                   |              dd
it        j                  t               S # t        j                  t               w xY w)Nr   )timeoutz/[html_agregator_task] already running -> lockedF)r   namesr   r   )kwargs	countdownstatuslockedc                    K   d } d }	 t               d {   } t        t              |       d {   }t        dt	        |       d       |s9	 	 |r|j                          d {    	 | r| j                          d {    y y t               4 d {   }|j                  j                  dt               d {   }  | j                  di t         d {   }|j                  t               d {    |j                          d {   }t!        ||       d {    d d d       d {    	 |r|j                          d {    	 | r| j                          d {    y y 7 a7 G7 # t        $ r Y w xY w7 # t        $ r Y y w xY w7 	7 7 7 7 7 7 t# 1 d {  7  sw Y   xY w7 p# t        $ r Y yw xY w7 f# t        $ r Y y w xY w# 	 |r|j                          d {  7   n# t        $ r Y nw xY w	 | r| j                          d {  7   w w # t        $ r Y w w xY wxY ww)N)r   namer   z#[html_agregator_task] Do pobrania: z stronT)headlessargs)pages )r   r   listprintlenclose	Exceptionr   chromiumlaunchPLAYWRIGHT_ARGSnew_contextCTX_KWadd_init_scriptSTEALTH_INIT_SCRIPTnew_pager   )	browsercontextpages_qsr#   ppager   r   r   s	         %/var/www/extractly/extractly/tasks.pyrunnerz#html_agregator_task.<locals>.runner9   s$    	1e]lmmH---h77E7E
|6JK!--/))!--/))  ())Q !

 1 14o 1 VV 3 3 3 =f ==--.ABBB$--//3DFFF *)!--/))!--/)) + n7  *  * # *V=B/F *))) *  * !--/)) !--/))  s
  I	G, E;G, E>G, F -F.F 3F F	F I	G, F&G, !%F5F)F5$F+%F5F-F5F/F5.F1/F53G, >F3?G, G G
G G 4G5G 9I	;G, >G, F 	FI	FI	F 	F# I	"F##I	&G, )F5+F5-F5/F51F53G, 5G;F><GG, 
G 	GI	GI	G 	G)&I	(G))I	,I.H
HH
	I
	HIHIH7/H20H75I7	I IIII	ok)
r
   addLOCK_KEYLOCK_TTLr&   html_agregator_taskapply_asyncasynciorundelete)r   r   r   r   r   gotr8   s   ```    r7   r=   r=   +   s     ))Hc8
4C?@++"(5_sxy' ,  (##<FH$XXs   B B-c                  >    d } t        j                   |               y)u   
    Uruchamia scraper dla wszystkich źródeł w trybie 'fetch'.
    (scrape_all_sources ma już w sobie własny kontekst/UA/locale/itd.)
    c                     K   	 t        d       t        d d dd       d {    y 7 # t        $ r} t        d|         Y d } ~ y d } ~ ww xY ww)Nz*Uruchamiam scrape_all_sources (mode=fetch)fetchT	source_idparamsmoder!   u   Błąd w link_agregator_task: r&   r	   r)   es    r7   r8   z#link_agregator_task.<locals>.runnere   sV     		8>?$	    	821#677	86   A) ') A) 	A
A AA

ANr?   r@   r8   s    r7   link_agregator_taskrP   _   s    
8 KK    c                  >    d } t        j                   |               y)u   
    Uruchamia scraper w trybie 'check', który sprawdza status aktywności źródeł.
    (scrape_all_sources ma już w sobie własny kontekst/UA/locale/itd.)
    c                     K   	 t        d       t        d d dd       d {    y 7 # t        $ r} t        d|         Y d } ~ y d } ~ ww xY ww)Nz*Uruchamiam scrape_all_sources (mode=check)checkTrF   u   Błąd w is_active_check_task: rJ   rK   s    r7   r8   z$is_active_check_task.<locals>.runnerz   sV     		9>?$	    	93A3788	9rM   NrN   rO   s    r7   is_active_check_taskrU   t   s    
9 KKrQ   )trueNFF<   )r?   celeryr   asgiref.syncr   playwright.async_apir   html_agregator.html_fetcherr   html_agregator.utils.processr   extractly.modelsr   link_agregator.source_managmentr	   django.core.cacher
   r,   dictr.   r0   r;   r<   strboolintr=   rP   rU   r$   rQ   r7   <module>rd      s      & 1 E = 1 > #
 
	= s+4%(
  / /4 /lp /  JM / /f  (  rQ   