# manual_agregator/parser/handlers/text.py
from .base import FieldHandler
from ..utils import resolve_missing_text

def _normalize_ws(s: str, keep_newlines: bool) -> str:
    # Collapse whitespace. If keep_newlines, collapse inside lines and keep '\n' between paragraphs.
    if keep_newlines:
        lines = [ " ".join(line.split()) for line in s.splitlines() ]
        return "\n".join([l for l in lines if l.strip() != ""])
    return " ".join(s.split())

class TextHandler(FieldHandler):
    def parse(self, field_name, config, soup, main_values, selectors, extracted):
        # Prefer raw from labelPair
        if "rawOverride" in config:
            raw = config["rawOverride"]
            if raw is None:
                return resolve_missing_text(config)
            keep_paragraphs = bool(config.get("paragraphs", False))
            txt = str(raw)
            return _normalize_ws(txt, keep_paragraphs).strip()
        # Fallback: select from DOM
        sel = config.get("selector")
        
        # DEBUG: Log selector attempts for premises_location
        if field_name == "premises_location":
            print(f"\n[DEBUG premises_location] selector='{sel}'")
            print(f"[DEBUG premises_location] soup has {len(soup.get_text())} chars")
            if sel:
                all_matches = soup.select(sel)
                print(f"[DEBUG premises_location] found {len(all_matches)} matches with selector")
                for i, match in enumerate(all_matches[:3]):
                    print(f"  Match {i}: {match.get_text(strip=True)[:100]}")
        
        el = soup.select_one(sel) if sel else None
        if el:
            keep_paragraphs = bool(config.get("paragraphs", False))
            if keep_paragraphs:
                result = _normalize_ws(el.get_text(separator="\n", strip=True), True)
            else:
                result = _normalize_ws(el.get_text(strip=True), False)
            
            # DEBUG: Log result for premises_location
            if field_name == "premises_location":
                print(f"[DEBUG premises_location] RESULT: '{result}'")
            
            return result
        
        # DEBUG: Log when element not found
        if field_name == "premises_location":
            print(f"[DEBUG premises_location] Element NOT FOUND, returning missing value")
        
        return resolve_missing_text(config)
