# -*- coding: utf-8 -*-
"""
Inactive rules evaluator (NEW + LEGACY) z trace.

Obsługiwane warunki (pojedynczy dict):
  NEW:
    - selector_exists: "css"
    - selector_missing: "css"
    - selector_text_equals: { "selector": "css", "text": "..." }
    - selector_contains: { "selector": "css", "text": "..." }
    - selector_exists_any: ["css1", "css2", ...]
    - selector_missing_all: ["css1", "css2", ...]
    - text_contains: "..." | ["...", "..."]
    - text_contains_any: ["...", "..."]
    - field_empty: "name"          (czyta z `extracted`)
    - field_missing: "name"        (czyta z `extracted`)
    - url_contains: "..."          (czyta z param. `url`)
    - http_status_in: [404, 410]   (czyta z param. `http_status`)
    - redirect_url_contains_any: ["nie-znaleziono","ad-not-available"] (czyta z `redirects`|`url`)
    - redirected: true/false       (czyta z `redirects`)

  LEGACY (pojedynczy dict z "type"):
    - { "type": "text_contains", "text": "..." }
    - { "type": "selector_missing", "selector": "..." }
    - { "type": "selector_contains", "selector": "...", "text": "..." }
    - { "type": "selector_text", "selector": "...", "text": "..." }
    - { "type": "source_field_match", "field": "...", "match": "..." }

Top-level:
  - NOWY: { "when": { "all":[...], "any":[...], "none":[...] }, "reason":"..." }
  - LEGACY: pojedynczy condition-dict (traktowany jak NEW z any=[...]).
"""

from __future__ import annotations

import json
import unicodedata
from typing import Any, Dict, Iterable, List, Optional, Tuple

from bs4 import BeautifulSoup


# ---------- utils ----------

def _as_list(x):
    if x is None:
        return []
    if isinstance(x, (list, tuple)):
        return list(x)
    return [x]


def _normalize_ascii(s: str) -> str:
    s = (s or "").lower()
    s = unicodedata.normalize("NFKD", s)
    s = "".join(ch for ch in s if not unicodedata.combining(ch))
    return " ".join(s.split())


def _text_contains(haystack: Optional[str], needles: Iterable[str]) -> bool:
    if not haystack:
        return False
    low = str(haystack).lower()
    low_norm = _normalize_ascii(low)
    for n in needles:
        n_low = (n or "").lower()
        if not n_low:
            continue
        if n_low in low:
            return True
        if _normalize_ascii(n_low) in low_norm:
            return True
    return False


def _safe_soup(html: str | None) -> BeautifulSoup:
    try:
        return BeautifulSoup(html or "", "lxml")
    except Exception:
        return BeautifulSoup(html or "", "html.parser")


# ---------- single-condition evaluator ----------

def _cond_ok(
    cond: Dict[str, Any],
    soup: BeautifulSoup,
    html: str,
    *,
    url: Optional[str] = None,
    http_status: Optional[int] = None,
    redirects: Optional[List[str]] = None,
    extracted: Optional[dict] = None,
    trace: bool = False,
    debug: List[str] | None = None,
) -> bool:
    dbg = debug if debug is not None else []

    # NEW style
    if "selector_exists" in cond:
        sel = cond.get("selector_exists")
        ok = bool(sel and soup.select_one(sel))
        if trace and ok:
            dbg.append(f"[when.selector_exists] {sel}")
        return ok

    if "selector_missing" in cond:
        sel = cond.get("selector_missing")
        ok = bool(sel) and (soup.select_one(sel) is None)
        if trace and ok:
            dbg.append(f"[when.selector_missing] {sel}")
        return ok

    if "selector_text_equals" in cond:
        cfg = cond.get("selector_text_equals") or {}
        sel = cfg.get("selector")
        txt = (cfg.get("text") or "").strip().lower()
        el = soup.select_one(sel) if sel else None
        ok = bool(el and el.get_text(" ", strip=True).strip().lower() == txt)
        if trace and ok:
            dbg.append(f"[when.selector_text_equals] {sel} == '{txt}'")
        return ok

    if "selector_contains" in cond and isinstance(cond["selector_contains"], dict):
        cfg = cond["selector_contains"]
        sel = cfg.get("selector")
        txt = (cfg.get("text") or "").lower()
        el = soup.select_one(sel) if sel else None
        ok = bool(el and txt in el.get_text(" ", strip=True).lower())
        if trace and ok:
            dbg.append(f"[when.selector_contains] {sel} ~ '{txt}'")
        return ok

    if "selector_exists_any" in cond:
        sels = _as_list(cond.get("selector_exists_any"))
        for s in sels:
            try:
                if s and soup.select_one(s):
                    if trace:
                        dbg.append(f"[when.selector_exists_any] {s}")
                    return True
            except Exception as e:
                if trace:
                    dbg.append(f"[when.selector_exists_any] error {s}: {e}")
        return False

    if "selector_missing_all" in cond:
        sels = _as_list(cond.get("selector_missing_all"))
        ok = all(s and (soup.select_one(s) is None) for s in sels)
        if trace and ok:
            dbg.append(f"[when.selector_missing_all] {sels}")
        return ok

    if "text_contains" in cond:
        needles = _as_list(cond.get("text_contains"))
        ok = _text_contains(html, needles)
        if trace and ok:
            dbg.append(f"[when.text_contains] {needles}")
        return ok

    if "text_contains_any" in cond:
        needles = _as_list(cond.get("text_contains_any"))
        ok = _text_contains(html, needles)
        if trace and ok:
            dbg.append(f"[when.text_contains_any] {needles}")
        return ok

    if "field_empty" in cond:
        if extracted is None:
            return False
        key = cond.get("field_empty")
        val = extracted.get(key)
        ok = (val is None) or (isinstance(val, str) and not val.strip()) or (isinstance(val, (list, dict)) and not val)
        if trace and ok:
            dbg.append(f"[when.field_empty] {key}")
        return ok

    if "field_missing" in cond:
        if extracted is None:
            return False
        key = cond.get("field_missing")
        ok = extracted.get(key) is None
        if trace and ok:
            dbg.append(f"[when.field_missing] {key}")
        return ok

    if "url_contains" in cond:
        frag = (cond.get("url_contains") or "").lower()
        ok = bool(frag and url and frag in url.lower())
        if trace and ok:
            dbg.append(f"[when.url_contains] '{frag}' in {url}")
        return ok

    if "http_status_in" in cond:
        codes = _as_list(cond.get("http_status_in"))
        ok = http_status in codes if http_status is not None else False
        if trace and ok:
            dbg.append(f"[when.http_status_in] {http_status}")
        return ok

    if "redirected" in cond:
        want = bool(cond.get("redirected"))
        # uznaj „przekierowano” jeśli lista redirectów ma >1 pozycji (lub url != pierwszemu)
        redirected_now = bool(redirects and len(redirects) > 1)
        ok = (redirected_now is want)
        if trace and ok:
            dbg.append(f"[when.redirected] {redirected_now}")
        return ok

    if "redirect_url_contains_any" in cond:
        needles = _as_list(cond.get("redirect_url_contains_any"))
        # sprawdź końcowy url + ewentualnie łańcuch redirectów
        urls_to_check = list(redirects or [])
        if url:
            urls_to_check.append(url)
        for u in urls_to_check:
            if any((ph or "").lower() in (u or "").lower() for ph in needles):
                if trace:
                    dbg.append(f"[when.redirect_url_contains_any] hit in {u}")
                return True
        return False

    # LEGACY style
    t = cond.get("type")
    if t == "text_contains":
        ok = _text_contains(html, _as_list(cond.get("text")))
        if trace and ok:
            dbg.append(f"[legacy.text_contains] {cond.get('text')}")
        return ok

    if t == "selector_missing":
        sel = cond.get("selector")
        ok = bool(sel) and (soup.select_one(sel) is None)
        if trace and ok:
            dbg.append(f"[legacy.selector_missing] {sel}")
        return ok

    if t == "selector_contains":
        sel = cond.get("selector")
        txt = (cond.get("text") or "").lower()
        el = soup.select_one(sel) if sel else None
        ok = bool(el and txt in el.get_text(" ", strip=True).lower())
        if trace and ok:
            dbg.append(f"[legacy.selector_contains] {sel} ~ '{txt}'")
        return ok

    if t == "selector_text":
        sel = cond.get("selector")
        txt = (cond.get("text") or "").strip().lower()
        el = soup.select_one(sel) if sel else None
        ok = bool(el and el.get_text(" ", strip=True).strip().lower() == txt)
        if trace and ok:
            dbg.append(f"[legacy.selector_text] {sel} == '{txt}'")
        return ok

    if t == "source_field_match":
        field = cond.get("field")
        match = (cond.get("match") or "").lower()
        value = None
        try:
            value = getattr(url, field, "")
        except Exception:
            value = ""
        try:
            if isinstance(value, dict):
                value = json.dumps(value, ensure_ascii=False)
        except Exception:
            pass
        ok = bool(value and match in str(value).lower())
        if trace and ok:
            dbg.append(f"[legacy.source_field_match] {field} ~ '{match}'")
        return ok

    return False


def _normalize_rules(rules) -> list:
    if not rules:
        return []
    if isinstance(rules, str):
        try:
            val = json.loads(rules)
        except Exception:
            return []
    else:
        val = rules
    if isinstance(val, dict):
        return [val]
    if isinstance(val, (list, tuple)):
        return list(val)
    return []


# ---------- PUBLIC API ----------

def is_inactive(
    html: str,
    inactive_config,
    *,
    url: Optional[str] = None,
    http_status: Optional[int] = None,
    redirects: Optional[List[str]] = None,
    extracted: Optional[dict] = None,
    trace: bool = False,
) -> Tuple[bool, str] | Tuple[bool, str, List[str]]:
    """
    Zwraca:
      - gdy trace=False: (is_inactive, reason)
      - gdy trace=True:  (is_inactive, reason, debug:list[str])
    """
    html = html or ""
    rules_list = _normalize_rules(inactive_config)
    debug: List[str] = []

    for idx, rule in enumerate(rules_list):
        if not isinstance(rule, dict):
            continue

        # NEW style: when{all/any/none}
        when = rule.get("when")
        if isinstance(when, dict):
            soup = _safe_soup(html)
            all_ok = all(
                _cond_ok(c, soup, html, url=url, http_status=http_status, redirects=redirects,
                         extracted=extracted, trace=trace, debug=debug)
                for c in _as_list(when.get("all"))
            )
            any_list = _as_list(when.get("any"))
            any_ok = True if not any_list else any(
                _cond_ok(c, soup, html, url=url, http_status=http_status, redirects=redirects,
                         extracted=extracted, trace=trace, debug=debug)
                for c in any_list
            )
            none_ok = not any(
                _cond_ok(c, soup, html, url=url, http_status=http_status, redirects=redirects,
                         extracted=extracted, trace=trace, debug=debug)
                for c in _as_list(when.get("none"))
            )
            if all_ok and any_ok and none_ok:
                reason = rule.get("reason") or rule.get("name") or f"rule_{idx}_match"
                if trace:
                    debug.append(f"[when] matched => reason='{reason}'")
                    return True, reason, debug
                return True, reason
            continue

        # LEGACY: traktuj pojedynczy condition jak ANY
        soup = _safe_soup(html)
        if _cond_ok(rule, soup, html, url=url, http_status=http_status, redirects=redirects,
                    extracted=extracted, trace=trace, debug=debug):
            reason = rule.get("reason") or rule.get("type") or "legacy_rule_match"
            if trace:
                debug.append(f"[legacy] matched => reason='{reason}'")
                return True, reason, debug
            return True, reason

    out = (False, "Brak oznak nieaktywności")
    return (*out, debug) if trace else out
