# -*- coding: utf-8 -*-
"""
Inactive rules evaluator.

Supported NEW-style conditions inside a single predicate dict:
  - selector_exists: "css"
  - selector_missing: "css"
  - selector_text_equals: { "selector": "css", "text": "..." }
  - selector_contains: { "selector": "css", "text": "..." }
  - selector_exists_any: ["css1", "css2", ...]
  - selector_missing_all: ["css1", "css2", ...]
  - text_contains: "..." | ["...", "..."]
  - text_contains_any: ["...", "..."]
  - field_empty: "name"        (reads from `extracted`)
  - field_missing: "name"      (reads from `extracted`)
  - url_contains: "..."        (reads from page.url)

Legacy compatibility (single dict with "type"):
  - { "type": "text_contains", "text": "..." }
  - { "type": "selector_missing", "selector": "..." }
  - { "type": "selector_contains", "selector": "...", "text": "..." }
  - { "type": "selector_text", "selector": "...", "text": "..." }
  - { "type": "source_field_match", "field": "...", "match": "..." }

Top-level rule styles:
  - NEW: { "when": { "all":[...], "any":[...], "none":[...] }, "reason":"..." }
  - LEGACY single condition dict (treated like NEW with any=[...]).
"""

import json
from typing import Any, Dict, Iterable, Tuple, Optional
import unicodedata
from bs4 import BeautifulSoup


def _as_list(x):
    if x is None:
        return []
    if isinstance(x, (list, tuple)):
        return list(x)
    return [x]


def _normalize_ascii(s: str) -> str:
    """Lowercase, strip diacritics, collapse whitespace to single spaces."""
    s = (s or "").lower()
    # Normalize and strip accents/diacritics
    s = unicodedata.normalize("NFKD", s)
    s = "".join(ch for ch in s if not unicodedata.combining(ch))
    # Collapse whitespace
    return " ".join(s.split())


def _text_contains(haystack: Optional[str], needles: Iterable[str]) -> bool:
    if not haystack:
        return False
    low = str(haystack).lower()
    low_norm = _normalize_ascii(low)
    for n in needles:
        n_low = (n or "").lower()
        if not n_low:
            continue
        # direct match
        if n_low in low:
            return True
        # ascii/diacritics-insensitive match (allows using ASCII needles)
        if _normalize_ascii(n_low) in low_norm:
            return True
    return False


def _cond_ok(
    cond: Dict[str, Any],
    soup: BeautifulSoup,
    html: str,
    page=None,
    extracted: Optional[dict] = None,
) -> bool:
    """Evaluate a single condition dict."""

    # --- NEW style ---
    if "selector_exists" in cond:
        sel = cond.get("selector_exists")
        return bool(sel and soup.select_one(sel))

    if "selector_missing" in cond:
        sel = cond.get("selector_missing")
        return bool(sel) and (soup.select_one(sel) is None)

    if "selector_text_equals" in cond:
        cfg = cond.get("selector_text_equals") or {}
        sel = cfg.get("selector")
        txt = (cfg.get("text") or "").strip().lower()
        el = soup.select_one(sel) if sel else None
        return bool(el and el.get_text(" ", strip=True).strip().lower() == txt)

    if "selector_contains" in cond and isinstance(cond["selector_contains"], dict):
        cfg = cond["selector_contains"]
        sel = cfg.get("selector")
        txt = (cfg.get("text") or "").lower()
        el = soup.select_one(sel) if sel else None
        return bool(el and txt in el.get_text(" ", strip=True).lower())

    # New helpers for robustness across HTML variants
    if "selector_exists_any" in cond:
        sels = _as_list(cond.get("selector_exists_any"))
        return any(s and soup.select_one(s) for s in sels)

    if "selector_missing_all" in cond:
        sels = _as_list(cond.get("selector_missing_all"))
        return all(s and (soup.select_one(s) is None) for s in sels)

    if "text_contains" in cond:
        needles = _as_list(cond.get("text_contains"))
        return _text_contains(html, needles)

    if "text_contains_any" in cond:
        needles = _as_list(cond.get("text_contains_any"))
        return _text_contains(html, needles)

    if "field_empty" in cond:
        if extracted is None:
            return False
        key = cond.get("field_empty")
        val = extracted.get(key)
        if val is None:
            return True
        if isinstance(val, str) and val.strip() == "":
            return True
        if isinstance(val, (list, dict)) and not val:
            return True
        return False

    if "field_missing" in cond:
        if extracted is None:
            return False
        key = cond.get("field_missing")
        return extracted.get(key) is None

    if "url_contains" in cond and page is not None:
        frag = (cond.get("url_contains") or "").lower()
        return bool(frag and frag in (getattr(page, "url", "") or "").lower())

    # --- legacy style ---
    t = cond.get("type")
    if t == "text_contains":
        return _text_contains(html, _as_list(cond.get("text")))
    if t == "selector_missing":
        sel = cond.get("selector")
        return bool(sel) and (soup.select_one(sel) is None)
    if t == "selector_contains":
        sel = cond.get("selector")
        txt = (cond.get("text") or "").lower()
        el = soup.select_one(sel) if sel else None
        return bool(el and txt in el.get_text(" ", strip=True).lower())
    if t == "selector_text":
        sel = cond.get("selector")
        txt = (cond.get("text") or "").strip().lower()
        el = soup.select_one(sel) if sel else None
        return bool(el and el.get_text(" ", strip=True).strip().lower() == txt)
    if t == "source_field_match" and page is not None:
        field = cond.get("field")
        match = (cond.get("match") or "").lower()
        value = getattr(page, field, "")
        try:
            if isinstance(value, dict):
                value = json.dumps(value, ensure_ascii=False)
        except Exception:
            pass
        return bool(value and match in str(value).lower())

    return False


def _normalize_rules(rules) -> list:
    """Normalize rules input to a list of dicts."""
    if not rules:
        return []
    if isinstance(rules, str):
        try:
            val = json.loads(rules)
        except Exception:
            return []
    else:
        val = rules
    if isinstance(val, dict):
        return [val]
    if isinstance(val, (list, tuple)):
        return list(val)
    return []


def is_inactive(
    html: str,
    soup: BeautifulSoup,
    rules,
    page=None,
    extracted: Optional[dict] = None,
) -> Tuple[bool, Optional[str]]:
    """
    Returns (inactive_bool, reason_str).

    NEW style rule:
      { "when": { "all":[...], "any":[...], "none":[...] }, "reason": "..." }

    LEGACY style: a single condition dict is treated as NEW with `any=[...]`.

    Empty rules -> (False, None).
    """
    html = html or ""  # be safe for None
    rules_list = _normalize_rules(rules)

    for rule in rules_list:
        if not isinstance(rule, dict):
            continue

        when = rule.get("when")
        # NEW style
        if isinstance(when, dict):
            all_ok = all(_cond_ok(c, soup, html, page, extracted) for c in _as_list(when.get("all")))
            any_list = _as_list(when.get("any"))
            any_ok = True if not any_list else any(_cond_ok(c, soup, html, page, extracted) for c in any_list)
            none_ok = not any(_cond_ok(c, soup, html, page, extracted) for c in _as_list(when.get("none")))
            if all_ok and any_ok and none_ok:
                reason = rule.get("reason") or rule.get("name") or "rule_match"
                return True, reason
            continue

        # LEGACY: treat as `any`
        if _cond_ok(rule, soup, html, page, extracted):
            reason = rule.get("reason") or rule.get("type") or "legacy_rule_match"
            return True, reason

    return False, None