
    wiW	                     F    d dl mZ ddlmZ dededefdZ G d d	e      Zy
)   )FieldHandler   )resolve_missing_textskeep_newlinesreturnc                 2   |rm| j                         D cg c]!  }dj                  |j                               # }}dj                  |D cg c]  }|j                         dk7  s| c}      S dj                  | j                               S c c}w c c}w )N 
 )
splitlinesjoinsplitstrip)r   r   linelinesls        ;/var/www/extractly/manual_agregator/parser/handlers/text.py_normalize_wsr      sv    56\\^E^T#((4::<(^EyyU>Uaggi2o!U>??88AGGI F>s   &BB%Bc                       e Zd Zd Zy)TextHandlerc           	      L   d|v rR|d   }|t        |      S t        |j                  dd            }t        |      }	t	        |	|      j                         S |j                  d      }
|dk(  rt        d|
 d       t        dt        |j                                d	       |
rc|j                  |
      }t        d
t        |       d       t        |d d       D ])  \  }}t        d| d|j                  d      d d         + |
r|j                  |
      nd }|rmt        |j                  dd            }|rt	        |j                  dd      d      }nt	        |j                  d      d      }|dk(  rt        d| d       |S |dk(  rt        d       t        |      S )NrawOverride
paragraphsFselectorpremises_locationz%
[DEBUG premises_location] selector=''z#[DEBUG premises_location] soup has z charsz [DEBUG premises_location] found z matches with selector   z  Match z: T)r   d   r   )	separatorr   z#[DEBUG premises_location] RESULT: 'zD[DEBUG premises_location] Element NOT FOUND, returning missing value)r   boolgetstrr   r   printlenget_textselect	enumerate
select_one)self
field_nameconfigsoupmain_values	selectors	extractedrawkeep_paragraphstxtselall_matchesimatchelresults                   r   parsezTextHandler.parse   s   F"'C{+F33"6::lE#BCOc(C o6<<>>jj$ ,,:3%qAB7DMMO8L7MVTU"kk#.8[9I8JJ`ab )+bq/ :HAuHQCr%..t.*DTc*J)KLM !; &)T__S!d"6::lE#BCO&r{{T{'NPTU&r{{{'>F 00;F81EFM ,,XZ#F++    N)__name__
__module____qualname__r:    r;   r   r   r      s    (,r;   r   N)baser   utilsr   r#   r!   r   r   r?   r;   r   <module>rB      s1     (S  # ),, ),r;   