
    .$h(                         d dl mZ d dlmZmZ d dlmZ d dlmZ d dl	Z	d dl
Z
d dlmZ dedefd	Zd
efdZded
efdZded
efdZded
efdZdedefdZ G d de      Zy)    )BaseCommand)QCount)now)PathN)NetworkMonitoredPagegrouplimitc                    t         j                  j                         }t        d      t        d      z  t        d      z  }t        d      t        d      z  t        d	      z  }||z  }t         j                  j	                  |      j                         }t         j                  j	                  |      j                         }t         j                  j	                  |      j                         }t         j                  j	                  |d
      j                         }	t         j                  j	                  |d
      j                         }
t         j                  j	                  |d
      j                         }d }| rt         j                  j                  d      j                  t        d      t        d|      t        d|      t        d|      t        d|t        d
      z        t        d|t        d
      z        t        d|t        d
      z              j                  ddd      }i }|D ]-  }|d   xs d}|d   |d   |d   |d   |d   |d   |d   d||<   / t        t        |j                               d t        d|             }nd }t               j                         |||||	|
|d}|
||d<   ||d<   |S )NT)html__isnullz^\s*$)html__regexerror)html__exact)sliced_html__isnull)sliced_html__regex)sliced_html__exactF)	is_activenameid)filter)total
empty_htmlempty_sliced_html
empty_bothinactive_empty_htmlinactive_empty_sliced_htmlinactive_empty_bothz-empty_htmlz-empty_sliced_htmlz-totalz(null)r   r   r   r   r   r   r      )tstotal_pagesr   r   r   r   r   r   by_name_by_name_top)r   objectscountr   r   valuesannotater   order_bydictlistitemsmaxr   	isoformat)r	   r
   r   empty_html_qempty_sliced_qempty_both_qr   r   r   r   r   r   by_name_toprowsr!   rkeysnaps                     ?/var/www/extractly/extractly/management/commands/html_health.py_count_snapshotr6      s    ((..0E $'!*AAARYDZZL4013QQTUipTqqN.0L &--44\BHHJJ,44;;NKQQS%--44\BHHJJ /66==lV[=\bbd!5!=!=!D!D^_d!D!e!k!k!m.66==lV[=\bbdK ((//7XDk l;"'^"D l; %*$|aRWFX7X$Y+0nq[`Oa>a+b$)$|aRWFX7X$Y  
 Xm%98D 	 AF)'xC7o%&':%;o'()>'?./0L.M'()>'?GCL  403q%=AB eoo(/( (;'A':D !Y*^K    r4   c                    t        d       t        d       t        d| d           t        d| d           t        d| d    d	| j                  d
d       d       t        d| d    d	| j                  dd       d       t        d| d    d	| j                  dd       d       d| v r| d   rt        d       t        d       | d   j                         D ]m  \  }}t        d|dd|d   dd|d   dd|j                  d
d      dd|d   dd|j                  dd      dd|d   dd|j                  dd      dd       o y y y )N u    HTML health — current snapshotzTime:               r   zTotal pages:        r    zEmpty html:         r   z   (inactive: r   r   )zEmpty sliced_html:  r   r   zEmpty BOTH:         r   r   r"   zBy name (top):- 30  total=r   6d  empty_html= (inact:z)  empty_sliced=z)  both=)printgetr*   )r4   r   valss      r5   _print_snapshotrD   R   s   	"I	
,-	 d
-.	 m!4 5
67	 l!3 4N488LacdCeBffg
hi	 &9!: ;>$((SoqrJsIttu
vw	 l!3 4N488LacdCeBffg
hi$~"6b	~.446JD$T"IXd7mB%7 8"<04HTXXF[\]=^_a<b c  $%8 9"=XdhhOklmFnoqEr s\*2.htxx@UVW7XY[6\\]_ 7 #7r7   pathc                    g }|j                  d|d           |j                  d|d           |j                  d|d    d|j                  dd	       d
       |j                  d|d    d|j                  dd	       d
       |j                  d|d    d|j                  dd	       d
       |j                  d      r|j                  d       |d   j                         D ]k  \  }}|j                  d| d|d    d|d    d|j                  dd	       d|d    d|j                  dd	       d|d    d|j                  dd	       d
       m |j                  d       | j                  j	                  dd       | j                  dd      5 }|j                  d j                  |      d z          d d d        y # 1 sw Y   y xY w)!Nz	SNAPSHOT r   ztotal_pages:       r    zempty_html:        r   z (inactive: r   r   r:   zempty_sliced_html: r   r   zempty_both:        r   r   r!   zby_name:z  - z: total=r   z, empty_html=r@   z), empty_sliced_html=z), both=z---Tparentsexist_okautf-8encoding
)appendrB   r*   parentmkdiropenwritejoin)rE   r4   linesr   rC   fs         r5   _append_txtrW   i   s   E	LL9T$ZL)*	LL&tM':&;<=	LL&tL'9&:,txxPeghGiFjjklm	LL&t,?'@&AdhhWsuvNwMxxyz{	LL&tL'9&:,txxPeghGiFjjklmxx	Z y///1JD$LLtfHT']O 4"<01$((CXYZ:[9\ ]%%)*=%>$?xQmnoHpGq r\*+8DHH=RST4U3VVWY 2 
LLKKdT2	3	)Q			% 4'( 
*	)	)s   $GGc                 >   | j                   j                  dd       |j                         D ci c]  \  }}|dk7  s|| }}}| j                  dd      5 }|j	                  t        j                  |d      d	z          d d d        y c c}}w # 1 sw Y   y xY w)
NTrG   r"   rJ   rK   rL   F)ensure_asciirN   )rP   rQ   r*   rR   rS   jsondumps)rE   r4   kvrV   s        r5   _append_jsonlr^      s    KKdT2!ZZ\A\TQQ.-@AqD\DA	3	)Q	

4e4t;< 
*	) B	)	)s   BB*BBc                    | j                   j                  dd       |j                         D ci c]  \  }}|dk7  s|| }}}g }| j                         r<	 t	        j
                  | j                  d      xs d      }t        |t              sg }|j                  |       | j                  t	        j                  |dd	      d       y c c}}w # t        $ r g }Y Nw xY w)
NTrG   r"   rK   rL   []F   )rY   indent)rP   rQ   r*   existsrZ   loads	read_text
isinstancer)   	ExceptionrO   
write_textr[   )rE   r4   r\   r]   arrs        r5   _append_json_arrayrj      s    KKdT2!ZZ\A\TQQ.-@AqD\DA
C{{}	**T^^W^=EFCc4( JJtOODJJsqAGOT B  	C	s   CC;C C$#C$lastc                    |dk  ry | j                         st        d|  d       y | j                  j                         }t        d| d|  d       	 |dv r| j	                  d	      j                         }|| d  }|D ]}  }	 t        j                  |      }t        d
|j                  d       d|j                  d       d|j                  d       d|j                  d       d|j                  d       
        y |dk(  rt        j                  | j	                  d	      xs d      }|| d  D ]g  }t        d
|j                  d       d|j                  d       d|j                  d       d|j                  d       d|j                  d       
       i y |dk(  r| j	                  d	      }t        j                  d|t        j                        }	|	D 
cg c]  }
|
j                         s|
 }}
|| d  D ]1  }
|
j                         d   j                         }t        d
|        3 y t        d       y # t        $ r Y w xY wc c}
w # t        $ r}t        d| d       Y d }~y d }~ww xY w)Nr   z
(no history file at r:   z
Last z snapshots from :z.jsonlr9   rK   rL   r;   r   r=   r    r?   r   z  empty_sliced=r   z  both=r   .jsonr`   .txtz
^SNAPSHOT )flagsz3(unknown extension; supported: .jsonl, .json, .txt)z(failed to read history: )rc   rA   suffixlowerre   
splitlinesrZ   rd   rB   rg   resplit	MULTILINEstrip)rE   rk   extrU   tailliner4   ri   textblocksbsnaps
first_linees                 r5   _show_last_from_filer      s   qy;;=&tfA./
++


C	GD6)$q
12"0. NNGN4??AE$=D	::d+DTXXd^,HTXXm5L4M N&&*hh|&<%= >((,1D(E'F G  $ 679  G^**T^^W^=EFCTEF$(-1H0I J""&((<"8!9 :$$(HH-@$A#B C HH\235 $ F]>>7>3DXXmTFF &41!'')QE4D56]\\^A.446
:,'( # GH' !  5  0)!A.//0sc   /I A:I>I BI  AI !I7I;<I 8I 	II II 	I;"I66I;c                       e Zd ZdZd Zd Zy)CommandzlCounts empty HTML fields in NetworkMonitoredPage and writes a snapshot to a TXT or JSON file (with history).c                     |j                  dddd       |j                  ddd	       |j                  d
t        dd       |j                  dt        dd       |j                  ddd	       y )Nz--outputz-ozhtml_health.jsonlzLOutput file path. Extension decides format: .jsonl (default), .json, or .txt)defaulthelpz--group
store_truez6Include per-name breakdown in snapshot (can be large).)actionr   z--limit   z3How many 'name' groups to print in console (top-N).)typer   r   z--lastr   z=After writing, display last N snapshots from the output file.z
--no-printz@Do not print current snapshot to console (still writes to file).)add_argumentint)selfparsers     r5   add_argumentszCommand.add_arguments   s    '_	 	 	
 	I 	 	

 	F	 	 	
 	P	 	 	
 	S 	 	
r7   c                    t        |d         }t        |j                  d            }t        |j                  d      xs d      }t	        ||      }|j                  d      st        |       |j                  j                         }|dv rEt        ||       | j                  j                  | j                  j                  d| d	             n|d
k(  rEt        ||       | j                  j                  | j                  j                  d| d             n|dk(  rEt        ||       | j                  j                  | j                  j                  d| d             nFt        ||       | j                  j                  | j                  j                  d| d|              t        |j                  d      xs d      }|dkD  rt!        ||       y y )Noutputr	   r
   r   )r	   r
   no_printrn   z

Saved to z (JSONL append)ro   z (JSON array)rp   z (TXT append)z
Unknown extension 'z', saved as JSONL to rk   r   )r   boolrB   r   r6   rD   rr   rs   r^   stdoutrS   styleSUCCESSrj   rW   WARNINGr   )	r   argsoptsout_pathr	   r
   r4   ry   last_ns	            r5   handlezCommand.handle   s   X'TXXg&'DHHW%+,U%8xx
#D!oo##%. (D)KKdjj00;xj1XYZG^x.KKdjj00;xj1VWXF]$'KKdjj00;xj1VWX(D)KK

""%:3%?TU]T^#_` TXXf%*+A: 62 r7   N)__name__
__module____qualname__r   r   r    r7   r5   r   r      s    yD
<3r7   r   )django.core.management.baser   django.db.modelsr   r   django.utils.timezoner   pathlibr   rZ   ru   extractly.modelsr   r   r   r6   r(   rD   rW   r^   rj   r   r   r   r7   r5   <module>r      s    3 % %   	 1D4 D DN$ .)d )$ ).= =D =UT U U+0t +03 +0\=3k =3r7   