#manual_agregator/parser/rules.py

# -*- coding: utf-8 -*-
"""
Elastyczne reguły wyboru gałęzi selektorów i ustawiania pól.

Predykaty (możesz używać w "when"):
- selector_exists:   "#css"
- selector_missing:  "#css"
- selector_contains: {"selector":"#css", "text":"fragment"}
- text_contains:     "fragment"
- url_matches:       "regex"
- field_empty:       "price"         (tylko w fazie POST – po wstępnym parsowaniu)
- field_equals:      {"field":"...", "value": ...}  (tylko w fazie POST)

Grupowanie:
when = { "all":[...], "any":[...], "none":[...] }

Działania:
- skrót:  {"type": "otodom_v2", "when": {...}}
- pełne:  {"when": {...}, "then": {"set_type": "otodom_v2", "set": {"k":"v"}}}

Zwraca (set_type:str|None, set_fields:dict)
"""

from __future__ import annotations
import re
from bs4 import BeautifulSoup

# -------------------- predykaty --------------------

def _pred_selector_exists(p, soup: BeautifulSoup, **_):
    sel = p if isinstance(p, str) else p.get("selector")
    return bool(sel and soup.select_one(sel))

def _pred_selector_missing(p, soup: BeautifulSoup, **_):
    sel = p if isinstance(p, str) else p.get("selector")
    return bool(sel and not soup.select_one(sel))

def _pred_selector_contains(p, soup: BeautifulSoup, **_):
    if not isinstance(p, dict): return False
    sel = p.get("selector"); txt = (p.get("text") or "").strip().lower()
    el = soup.select_one(sel) if sel else None
    return bool(el and txt and txt in el.get_text(" ", strip=True).lower())

def _pred_text_contains(p, soup: BeautifulSoup, **_):
    txt = (p or "").strip().lower()
    return bool(txt and txt in soup.get_text(" ", strip=True).lower())

def _pred_url_matches(p, page=None, **_):
    pattern = p if isinstance(p, str) else p.get("pattern")
    url = getattr(page, "url", "") or ""
    try:
        return bool(pattern and re.search(pattern, url))
    except re.error:
        return False

def _pred_field_empty(p, extracted: dict, **_):
    key = p if isinstance(p, str) else p.get("field")
    v = extracted.get(key, None)
    return v in (None, "", [], {}, "{}", "[]")

def _pred_field_equals(p, extracted: dict, **_):
    if not isinstance(p, dict): return False
    key = p.get("field"); val = p.get("value")
    return extracted.get(key, None) == val

PREDICATES = {
    "selector_exists":   _pred_selector_exists,
    "selector_missing":  _pred_selector_missing,
    "selector_contains": _pred_selector_contains,
    "text_contains":     _pred_text_contains,
    "url_matches":       _pred_url_matches,
    "field_empty":       _pred_field_empty,
    "field_equals":      _pred_field_equals,
}

def _eval_one(pred: dict, soup, page, extracted, phase: str):
    if not isinstance(pred, dict) or len(pred) != 1:
        return False
    name, arg = next(iter(pred.items()))
    fn = PREDICATES.get(name)
    if not fn:
        return False
    # w fazie PRE ignorujemy predykaty zależne od już sparsowanych pól
    if phase == "pre" and name in ("field_empty", "field_equals"):
        return False
    return bool(fn(arg, soup=soup, page=page, extracted=extracted))

def _eval_when(when: dict | None, soup, page, extracted, phase: str):
    if not when:
        return True
    ok_all = all(_eval_one(p, soup, page, extracted, phase) for p in when.get("all", []))
    any_list = when.get("any", [])
    ok_any = any(_eval_one(p, soup, page, extracted, phase) for p in any_list) if any_list else True
    ok_none = not any(_eval_one(p, soup, page, extracted, phase) for p in when.get("none", []))
    return ok_all and ok_any and ok_none

def apply_dynamic_rules(rules, soup, page, extracted, phase: str = "pre"):
    """
    rules: list[dict] (patrz docstring)
    phase: "pre" | "post"
    Zwraca (override_type, extra_fields)
    """
    if not isinstance(rules, (list, tuple)):
        return None, {}

    for rule in rules:
        if not isinstance(rule, dict):
            continue

        when = rule.get("when") or {}
        if not _eval_when(when, soup, page, extracted, phase):
            continue

        set_type = rule.get("type")
        then = rule.get("then") or {}
        if then.get("set_type"):
            set_type = then["set_type"]
        set_fields = then.get("set") if isinstance(then.get("set"), dict) else {}

        return set_type, set_fields

    return None, {}
