
    .$hN                        d dl mZ d dlmZmZ d dlmZ d dlmZm	Z	 d dl
mZ d dlmZ d dlZd dlZd dlmZ d	ed
efdZded
efdZded
eeef   fdZded
eeef   fdZd&dee   ded
efdZdedededee   dedz  f
dZdee   d
efdZded
eeef   fdZd'dee   dedz  d
ee   fdZ d'dee   dedz  d
efdZ!defdZ"dedefdZ#dedefd Z$dedefd!Z%ded"efd#Z& G d$ d%e      Z'y)(    )BaseCommand)QCount)now)FieldDoesNotExist
FieldError)models)PathN)NetworkMonitoredPage
field_namereturnc                     	 | j                   j                  |      }t        |t        j
                  t        j                  f      S # t        $ r Y yw xY w)uD   Sprawdza czy pole (liść, bez '__') jest Char/Text na danym modelu.F)_meta	get_fieldr   
isinstancer	   	CharField	TextField)modelr   fs      A/var/www/extractly/extractly/management/commands/parser_health.py_is_text_fieldr      sO    KK!!*- a&**F,<,<=>>  s   A 	AA
field_pathc                     d| v r!t        di |  ddit        di |  ddiz  S t        t        |       r!t        di |  ddit        di |  ddiz  S t        di |  ddiS )u   
    Q wykrywające 'pustą' wartość w polu/ścieżce:
      - zawsze IS NULL
      - dla pól tekstowych także whitespace-only przez __regex
    ____isnullT__regexz^\s*$)r    )r   r   r   )r   s    r   _empty_qr      s     z3zl(+T23a6]j\;QS[:\6]]]*zB3zl(+T23a6]j\;QS[:\6]]]/:,h'.//    c                     t        |       }	 t        j                  j                  |      j	                         dfS # t
        $ r7 t        j                  j                  di |  ddij	                         dfcY S w xY w)z#Zwraca (count, null_only_fallback).Fr   Tr   r   r   objectsfiltercountr   r   qs     r   _count_total_empty_for_fieldr'   $   s}    Ad#++2215;;=uDD d#++22UH6Mt5TU[[]_cccds   .< =A<;A<c                 
   t        |       }	 t        j                  j                  |d      j	                         dfS # t
        $ r; t        j                  j                  di |  ddiddij	                         dfcY S w xY w)zJZwraca (count_inactive, null_only_fallback) dla pustych z is_active=False.F	is_activer   Tr*   r   r!   r%   s     r   %_count_total_empty_inactive_for_fieldr+   -   s    Au#++2212FLLNPUUU u#++22fH6Mt5Tf`efllnptttus   0> ABBfieldsuse_null_onlyc                    i }| D ]w  }d|j                  dd      z   }d|j                  dd      z   }|rt        di | ddi}nt        |      }t        d|      ||<   t        d|t        d	
      z        ||<   y |r%t               }| D ]  }|t        di | ddiz  } nt               }| D ]  }|t        |      z  } t        d|      |d<   t        d|t        d	
      z        |d<   |S )u   
    Adnotacje do per-name; dodaje:
      - empty__{fp} dla każdego pola
      - inactive__{fp} dla pustych + is_active=False
      - empty_any = OR(po wszystkich _empty_q)
      - inactive_empty_any = OR(po wszystkich _empty_q) + is_active=False
    empty__r   _
inactive__r   Tid)r#   Fr)   	empty_anyinactive_empty_anyr   )replacer   r   r   )r,   r-   annfpempty_aliasinactive_aliasq_emptyany_qs           r   _build_annotations_for_fieldsr<   8   s    C"**T3"77%

4(==2bT?D12GrlG g6K#D1u;M1MNN  BQ1RD/4011E  BXb\!E  T%0C %d51u;M3M NCJr   grouplimitfieldselected_namec                    t         j                  j                         }|xs |gD cg c]&  }|s|j                         s|j                         ( }}i }i }g }	g }
|D ]O  }t	        |      \  }}|||<   |r|	j                  |       t        |      \  }}|||<   |s?|
j                  |       Q ||d      }||d      }d }d }d }| r$	 t        |d      }t        j                  j                  d      j                  ddt        d      i|j                  dd      }i }|D ]  }|d   xs d
}|d   |j                  dd      |j                  dd      i i d}|D ]\  }d|j                  dd      z   }d|j                  dd      z   }|j                  |d      |d   |<   |j                  |d      |d   |<   ^ |||<    t        t!        |j#                               d t%        d|             }|||v r
d|i||   }nt         j                  j'                  |      }|j                         }i i }}|D ]]  }t)        ||      \  }} |||<   | r||	vr|	j                  |       	 t+        |      }!|j'                  |!d      j                         ||<   _ 	 t-        |      }"|j'                  |"      j                         }#|j'                  |"d      j                         }$|||#|$||d}t1               j3                         ||d   ||||||	|
d
}%|
||%d<   ||%d<   |||%d<   |%S c c}w # t        $ rZ t        |d	      }t        j                  j                  d      j                  ddt        d      i|j                  dd      }Y ow xY w# t        $ r0  |j&                  di | dd	iddij                         ||<   Y w xY w# t        $ rh t/               }"|D ]  }|"t/        di | dd	iz  }" |j'                  |"      j                         }#|j'                  |"d      j                         }$Y [w xY w)Nr   F)r-   nametotalr2   z
-empty_anyz-totalT(null)r3   r4   )rC   r3   r4   empty_by_fieldinactive_by_fieldr/   r   r0   r1   rE   rF      rB   r)   r   r*   )rB   rC   r3   r4   rE   rF   )
tsr,   primary_fieldtotal_pagesempty_totalempty_total_by_fieldinactive_total_primaryinactive_total_by_fieldnull_only_fallback_fields"null_only_fallback_fields_inactiveby_name_by_name_topr@   r   )r   r"   r$   stripr'   appendr+   r<   valuesannotater   order_byr   getr5   dictlistitemsmaxr#   "_count_total_empty_for_field_for_qr   _or_q_for_fieldsr   r   	isoformat)&r=   r>   r?   r,   r@   rC   r   rM   rO   rP   rQ   r7   cnt	null_only	cnt_inactnull_only_inactrL   rN   by_name_toprR   selectedannotationsrowsrkeyentrye_aliasi_aliasbase	total_sel	empty_mapinactive_mapnlfr&   r;   empty_any_cntinactive_any_cntsnaps&                                         r   _count_snapshotrv   ]   s    ((..0E"("3UG"3H"3Aaggiaggi"3FH+-.0+-46&5b9Y#&R %,,R0%J2%N"	?&/#.55b9  'vay1K4VAY?KGH	7eTK$,,33F;< %d</:<,1  AF)'xC7UU;2&'ee,@!&D"$%'E #bjjs&;;&D#)>>./eeGQ.?&'+12w1B)*2.	 
 !GCL   403q%=AB$'"MLW]5KL+33:::N JJL	*,b<	 BA$KHC$'IbMr)BB188<k$RL+/;;qE;+J+P+P+RR( !	S,V4E$(KK$6$<$<$>M'+{{5E{'J'P'P'R$ *&!.*:&/)5 eoo" 4 #9#:%>.PD  !Y*^ (_Kw IB  	7dSK$,,33F;< %d</:<,1 	X & k+64;;+bRD/49P+b\a+b+h+h+jR(k " SCE$!=Hot%<!== %$(KK$6$<$<$>M'+{{5E{'J'P'P'R$SsJ   L)L)L)AL. (/NAO .ANN5OOA-Q Qc                 D    t               }| D ]  }|t        |      z  } |S N)r   r   )r,   r&   r7   s      r   r_   r_      s&    	A	Xb\ Hr   c                     t        |      }	 | j                  |      j                         dfS # t        $ r*  | j                  di | ddij                         dfcY S w xY w)NFr   Tr   )r   r#   r$   r   )qsr   r&   s      r   r^   r^      sm    AJyy|!!#U** Jryy;zl(3T:;AACTIIJs    . 0A! A!c           	         t         j                  j                         }|r|j                  |      }	 t	        |j                  t        |             j                  dd            }|S # t        $ rS t               }| D ]  }|t        di | ddiz  } t	        |j                  |      j                  dd            }Y |S w xY w)uT   Zwraca listę ID pustych w *którymkolwiek* z pól (opcjonalnie zawężone do name).rH   r2   T)flatr   r   )	r   r"   allr#   r[   r_   values_listr   r   )r,   r@   rz   idsr;   r7   s         r   _collect_empty_idsr      s    		%	%	)	)	+BYYMY*B299-f56BB4dBST J  BBQ1RD/4011E 299U#//4/@AJBs   4A* *ACCc           	         t         j                  j                         }|r|j                  |      }	 |j                  t	        |             j                  dd      }i }|D ]A  \  }}|xs d}	|j                  |	dg d      }
|
d	   j                  |       |
d
xx   dz  cc<   C |S # t        $ rH t               }| D ]  }|t        di | ddiz  } |j                  |      j                  dd      }Y w xY w)u   
    Zwraca słownik: name -> {"count": int, "ids": [int, ...]}.
    Jeśli selected_name podane, zwraca tylko tę jedną grupę.
    rH   r2   rB   r   TrD   r   )r$   r   r   r$   rG   r   )
r   r"   r}   r#   r_   r~   r   r   
setdefaultrU   )r,   r@   rz   pairsr;   r7   grouped_idnmrj   buckets              r   _collect_empty_ids_groupedr      s   
 
	%	%	)	)	+BYYMY*;		*623??fM  "GRnH##C1R)@AuS!w1	 
 N  ;BQ1RD/4011E 		% ,,T6:	;s   *B( (AC98C9ru   c                    t        d       t        d| d           t        ddj                  | d                 t        d| d           t        d	| d
           t        d| d    d| j                  dd       d       | j                  d      r|t        d       | d   D ]i  }| d   j                  |d      }| j                  di       j                  |d      }|| j                  dg       v rdnd}t        d|dd| d| d|        k d| v r| d   rt        d       | d   j                         D ]  \  }}t        d|dd|d   d d!|d"   d d#|j                  d$d      d d	       |j                  d%i       }|j                  d&i       }| d   D ]:  }t        d'|d(d)|j                  |d      d d*|j                  |d      d d       <  y y y )+Nu#   
Parser health — current snapshotzTime:            rI   zFields:          , r,   zPrimary field:   rJ   zTotal pages:     rK   zEmpty (primary): rL   z   (inactive: rN   r   )rM   z
By field (totals):rO   rP   z  (NULL-only) - 40z empty= (inactive: rS   z
By name (top):30z total=rC   6dz  empty_any=r3    (inactive_any: r4   rE   rF   u       · 36 z  (inactive: )printjoinrY   r\   )	ru   r7   ra   inactmarkerrB   valsebfibfs	            r   _print_snapshotr     sC   	
01	d4j\
*+	diiX78
9:	d?34
56	d=12
34	d=12.JbdeAf@ggh
ijxx&'$%x.B-.222q9CHH6;??AFE(*dhh7RTV.W(W_]_FBr"gWSEeWAfXFG	 ! $~"6 !~.446JD$BtBiwtG}R&8T+EVWYDZZjkoksks  uI  JK  lL  MO  kP  PQ  R  S((+R0C((.3C8n2waAr':-PRTUWYGZZ[\] %	 7 #7r   pathc                    g }|j                  d|d           |j                  ddj                  |d                 |j                  d|d           |j                  d|d	           |j                  d
|d    d|j                  dd       d       |j                  d      r|j                  d       |d   D ]n  }|d   j                  |d      }|j                  di       j                  |d      }||j                  dg       v rdnd}|j                  d| d| d| d|        p |j                  d      r|j                  d       |d   j                         D ]  \  }}|j                  d| d|d    d|d    d|j                  dd       d	       |d   D ]]  }|j                  d | d|j                  d!i       j                  |d       d|j                  d"i       j                  |d       d       _  |j                  d#       | j                  j                  d$d$%       | j                  d&d'(      j                  d)j                  |      d)z          y )*Nz	SNAPSHOT rI   zfields:             r   r,   zprimary_field:      rJ   ztotal_pages:        rK   zempty_primary:      rL   r   rN   r   r   rM   zempty_total_by_field:rO   rP   z (NULL-only)r   z  - z: rR   zby_name:z: total=rC   z, empty_any=r3   r   r4   u	         · rE   rF   z---Tparentsexist_okautf-8encoding
)rU   r   rY   r\   parentmkdiropenwrite)	r   ru   linesr7   ra   r   r   rB   r   s	            r   _append_txtr   6  s   E	LL9T$ZL)*	LL'		$x.(A'BCD	LL'_(='>?@	LL'](;'<=>	LL'](;'<LRjklImHnnopqxx&',-x.B-.222q9CHH6;??AFE')TXX6QSU-V'V^\^FLL4t2cU,ugQvhGH	 !
 xx	Z y///1JD$LL4vXd7m_LkIZH[[klpltlt  vJ  KL  mM  lN  NO  P  Q8nyBtxx8H/L/P/PQSTU/V.WWcdhdldl  nA  BD  eE  eI  eI  JL  MN  eO  dP  PQ  R  S % 2 
LLKKdT2IIcGI$**499U+;d+BCr   c                    | j                   j                  dd       |j                         D ci c]  \  }}|dk7  s|| }}}| j                  dd      j	                  t        j                  |d      d	z          y c c}}w )
NTr   rS   r   r   r   Fensure_asciir   )r   r   r\   r   r   jsondumps)r   ru   kvs       r   _append_jsonlr   P  st    KKdT2!ZZ\A\TQQ.-@AqD\DAIIcGI$**4::d+ORV+VW Bs
   BBc                    | j                   j                  dd       |j                         D ci c]  \  }}|dk7  s|| }}}g }| j                         r<	 t	        j
                  | j                  d      xs d      }t        |t              sg }|j                  |       | j                  t	        j                  |dd	      d       y c c}}w # t        $ r g }Y Nw xY w)
NTr   rS   r   r   []F   )r   indent)r   r   r\   existsr   loads	read_textr   r[   	ExceptionrU   
write_textr   )r   ru   r   r   arrs        r   _append_json_arrayr   V  s    KKdT2!ZZ\A\TQQ.-@AqD\DA
C{{}	**T^^W^=EFCc4( JJtOODJJsqAGOT B  	C	s   CC;C C$#C$lastc                    |dk  ry | j                         st        d|  d       y | j                  j                         }t        d| d|  d       	 |dv r| j	                  d	      j                         }|| d  D ]{  }	 t        j                  |      }t        d
|j                  d       ddj                  |j                  dg              d|j                  d       d|j                  d              } y |dk(  rt        j                  | j	                  d	      xs d      }|| d  D ]e  }t        d
|j                  d       ddj                  |j                  dg              d|j                  d       d|j                  d              g y |dk(  r| j	                  d	      }t        j                  d|t        j                        }|D 	cg c]  }	|	j                         s|	 }
}	|
| d  D ]1  }	|	j                         d   j                         }t        d
|        3 y t        d       y # t        $ r Y w xY wc c}	w # t        $ r}t        d| d       Y d }~y d }~ww xY w)Nr   z
(no history file at r   z
Last z snapshots from :z.jsonlr   r   r   r   rI   z	  fields=,r,   z  total=rK   z  empty_primary=rL   .jsonr   .txtz
^SNAPSHOT )flagsz3(unknown extension; supported: .jsonl, .json, .txt)z(failed to read history: )r   r   suffixlowerr   
splitlinesr   r   rY   r   r   resplit	MULTILINErT   )r   r   extr   lineru   r   textblocksbsnaps
first_linees                r   _show_last_from_filer   e  sg   qy;;=&tfA./
++


C	GD6)$q
120. NNGN4??AEtef::d+DTXXd^,IchhtxxRT?U6V5W X!!%-!8 99I$((S`JaIbd & G^**T^^W^=EFCTEF$(	#((488Hb;Q2R1S T!XXm455Edhh}F]E^` $
 F]>>7>3DXXmTFF &41!'')QE4D56]\\^A.446
:,'( # GH# !  5  0)!A.//0sc   -I A8H>:I =BI AI I1I5<I 2I >	II 
II 	I5I00I5c                       e Zd ZdZd Zd Zy)Commanda*  Counts empties for given field(s) in NetworkMonitoredPage, grouped by 'name', and writes a snapshot to a TXT/JSON/JSONL file (with history). Use --print to output JSON list of IDs that are empty in ANY of the selected fields. Use --print-by-name to output JSON grouped by 'name' with IDs per group.c                    |j                  dddd       |j                  ddd	       |j                  d
t        dd       |j                  dt        dd       |j                  ddd	       |j                  ddd       |j                  dd       |j                  dd       |j                  dddd       |j                  dd dd!       y )"Nz--outputz-ozparser_health.jsonlz0Output file path (.jsonl default), or .json/.txt)defaulthelpz--group
store_truez6Include per-name breakdown in snapshot (console/file).)actionr   z--limit   z3How many 'name' groups to print in console (top-N).)typer   r   z--lastr   z=After writing, display last N snapshots from the output file.z
--no-printz)Do not print current snapshot to console.z--fieldnetwork_ad_manualz0Primary field (used when --fields not provided).z--fieldszWComma-separated fields (e.g. 'network_ad_manual__html,network_ad_manual__sliced_html').)r   z--namezIOptional exact name filter for print modes and snapshot selected section.z--print	print_idsz;Print JSON with IDs empty in ANY of the fields (flat list).)destr   r   z--print-by-nameprint_groupedz0Print JSON grouped by 'name' with IDs per group.)add_argumentint)selfparsers     r   add_argumentszCommand.add_arguments  s   J6K!S 	 	UIl!Y 	 	[IC!V 	 	XH3!` 	 	bL!L 	 	NI/B!S 	 	UJ!z 	 	|H!l 	 	nIK!^ 	 	`-OL!S 	 	Ur   c                 N   t        |d         }t        |j                  d            }t        |j                  d      xs d      }t	        |j                  d      xs d      }|j                  d      }|r-|j                  d      D cg c]  }|j                          c}ng }	|j                  d	      }
|	xs |gD cg c]  }|s|	 }	}|j                  d
      rg|j                  d      sVt        |	|
      }t               j                         |	|
dt        |      |d}t        t        j                  |d             y |j                  d      rt        |	|
      }t        d |j!                         D              }t#        |j%                         d       D cg c]  }|||   d   ||   d   d }}t               j                         |	|
d||d}t        t        j                  |d             y t'        ||||	|
      }|j                  d      st)        |       |j*                  j-                         }|dv rEt/        ||       | j0                  j3                  | j4                  j7                  d| d             n|dk(  rEt9        ||       | j0                  j3                  | j4                  j7                  d| d             n|dk(  rEt;        ||       | j0                  j3                  | j4                  j7                  d| d              nFt/        ||       | j0                  j3                  | j4                  j=                  d!| d"|              t        |j                  d#      xs d$      }|d$kD  rt?        ||       y y c c}w c c}w c c}w )%Noutputr=   r>   r   r?   r   r,   r   rB   r   r   )r@   any)rI   r,   rB   matchr$   r   Fr   c              3   &   K   | ]	  }|d      yw)r$   Nr   ).0r   s     r   	<genexpr>z!Command.handle.<locals>.<genexpr>  s     K:JF7O:Js   c                     | d u | fS rx   r   )r   s    r   <lambda>z Command.handle.<locals>.<lambda>  s    !t)Qr   )rj   r$   r   )rB   r$   r   )rI   r,   name_filterr   total_countgroups)r=   r>   r?   r,   r@   no_printr   z

Saved to z (JSONL append)r   z (JSON array)r   z (TXT append)z
Unknown extension 'z', saved as JSONL to r   r   ) r
   boolrY   r   strr   rT   r   r   r`   lenr   r   r   r   sumrV   sortedkeysrv   r   r   r   r   stdoutr   styleSUCCESSr   r   WARNINGr   )r   argsoptsout_pathr=   r>   rJ   
fields_rawsr,   r@   r   r   payloadr   	total_idsrB   groups_listru   r   last_ns                        r   handlezCommand.handle  sD   X'TXXg&'DHHW%+,DHHW-D1DEXXh'
?IZ%5%5c%:;%:!'')%:;r($77>7A!7> 88K /)B$V=ICeoo' %SG $**W59: 88O$0}UGK'..:JKKI #7<<>7OPPD g(>wt}UZG[\P  
 eoo' ,(%G $**W59: '
 xx
#D!oo##%. (D)KKdjj00;xj1XYZG^x.KKdjj00;xj1VWXF]$'KKdjj00;xj1VWX(D)KKdjj003HMbckbl1mnoTXXf%*+A: 62 C <>*s   NN	N#N"N)__name__
__module____qualname__r   r   r	  r   r   r   r   r     s    	S 	U,H3r   r   )Frx   )(django.core.management.baser   django.db.modelsr   r   django.utils.timezoner   django.core.exceptionsr   r   	django.dbr	   pathlibr
   r   r   extractly.modelsr   r   r   r   r   tupler   r'   r+   r[   rZ   r<   rv   r_   r^   r   r   r   r   r   r   r   r   r   r   r   <module>r     s   3 % % @    	 1?c ?d ?
0 
0 
0dS dU395E duc ueCI>N u $s)  D  UY  J~d ~3 ~s ~DI ~^adh^h ~JT#Y 1 Js JuS$Y?O JtCy t tTWy tCy t W[ 4^$ ^8Dd D$ D4X XD XUT U U&0t &03 &0Rf3k f3r   