# -*- coding: utf-8 -*-
import json, traceback
import requests
from bs4 import BeautifulSoup
from django.utils.timezone import now

from extractly.models import NetworkSourceError, NetworkMonitoredPage, SourceManual
from manual_agregator.notifications import send_alert_notification

from .utils import (
    check_inactive, value_is_empty,
    normalize_rules, normalize_selectors, map_data_to_manual_model
)
from .handlers import get_handler
from manual_agregator.parser.rules import apply_dynamic_rules  # pre/post rules (return (type, set_dict))
from manual_agregator.parser.transferred import apply_transferred, compute_stats  # NEW
# zamiast: from .utils import check_inactive
from manual_agregator.parser.inactive import is_inactive

# ---------------------- internal helper ----------------------

def _parse_with_selectors(soup, selectors: dict, strict: bool):
    extracted, main_values = {}, {}

    # 1) isMain first
    for field_name, config in (selectors or {}).items():
        if not isinstance(config, dict):
            continue
        if config.get("isMain"):
            field_type = "fromMain" if "fromMain" in config else config.get("fieldType", "text")
            handler = get_handler(field_type)
            try:
                val = handler.parse(field_name, config, soup, main_values, selectors, extracted)
            except Exception:
                val = None
            main_values[field_name] = val
            extracted[field_name] = val
            if strict and val is None:
                raise ValueError(f"Value not found (isMain): {field_name}")
            # compact debug: avoid dumping huge dicts that may include large inline JSON
            try:
                mv_keys = sorted([k for k in main_values.keys()])
                print(f"[isMain] {field_name} -> {('None' if val is None else 'ok')}; main_keys={mv_keys}")
            except Exception:
                pass

    # 2) the rest
    for field_name, config in (selectors or {}).items():
        if not isinstance(config, dict) or config.get("isMain"):
            continue
        field_type = "fromMain" if "fromMain" in config else config.get("fieldType", "text")
        handler = get_handler(field_type)
        try:
            val = handler.parse(field_name, config, soup, main_values, selectors, extracted)
        except Exception:
            val = None
        extracted[field_name] = val
        if strict and val is None:
            raise ValueError(f"Value not found: {field_name}")
        try:
            print(f"[field] {field_name} -> {('None' if val is None else 'ok')}")
        except Exception:
            pass

    return extracted, main_values

# ---------------------- main ----------------------

def parse_manual_data(page: NetworkMonitoredPage, strict=True) -> bool:
    source = page.source
    try:
        manual_config: SourceManual = source.manual_data_source_fetcher
    except SourceManual.DoesNotExist:
        NetworkSourceError.objects.create(
            source=source,
            error_message="Brak ManualDataSource dla tego źródła.",
            error_type="MissingManualDataSource"
        )
        print(f"[ERROR] ManualDataSource does not exist for source {source}")
        return False

    try:
        print("=" * 80)
        print(f"[INFO] Parsing page: {page.url}")
        print(f"[INFO] First 500 characters of HTML:\n{(page.html or '')[:500]}\n")

        # Build both soups and pick the richer one; also merge text for text-based rules.
        full_html = page.html or ""
        sliced_html = getattr(page, "sliced_html", None) or ""
        merged_html = f"{full_html}\n{sliced_html}" if sliced_html else full_html

        soup_full = BeautifulSoup(full_html, "html.parser")
        soup_sliced = BeautifulSoup(sliced_html, "html.parser") if sliced_html else None
        soup = soup_sliced if (soup_sliced and len(soup_sliced.get_text()) > len(soup_full.get_text())) else soup_full

        # -------- 1) PRE phase: decide selectors branch from DOM/URL ----------
        extracted = {}
        pre_type, pre_set = apply_dynamic_rules(
            normalize_rules(getattr(manual_config, "rules", None)),
            soup, page, extracted, phase="pre"
        )
        if pre_set:
            extracted.update(pre_set)

        all_sel_root = normalize_selectors(getattr(manual_config, "selectors", None))

        # resolve branch
        selected_type = pre_type
        if isinstance(all_sel_root, dict):
            # map type -> selectors OR flat fields-map
            if selected_type and selected_type in all_sel_root and isinstance(all_sel_root[selected_type], dict):
                selectors = all_sel_root[selected_type]
            else:
                # fallback: 'default' or flat map
                looks_flat = _looks_like_fields_map(all_sel_root)
                if "default" in all_sel_root and isinstance(all_sel_root["default"], dict):
                    selectors = all_sel_root["default"]
                    selected_type = selected_type or "default"
                elif looks_flat:
                    selectors = all_sel_root
                    selected_type = selected_type or "flat"
                else:
                    # single available branch?
                    if len(all_sel_root) == 1:
                        k, v = next(iter(all_sel_root.items()))
                        selectors = v if isinstance(v, dict) else {}
                        selected_type = k
                    else:
                        selectors = {}
                        selected_type = None
                if pre_type and pre_type not in all_sel_root:
                    print(f"[WARN] Type '{pre_type}' not in selectors; using fallback '{selected_type or 'unknown'}'.")
        else:
            selectors = all_sel_root  # edge case

        if not selectors:
            print("[ERROR] No usable selectors resolved. Skipping.")
            return False

        print(f"[INFO] Using selectors (type: {selected_type or 'default'}) -> {list(selectors.keys())}")

        # -------- 2) Parse with selected branch --------------------------------
        parsed_now, main_values = _parse_with_selectors(soup, selectors, strict)
        extracted.update(parsed_now)

        # -------- 3) POST phase: rules depending on parsed content -------------
        post_type, post_set = apply_dynamic_rules(
            normalize_rules(getattr(manual_config, "rules", None)),
            soup, page, extracted, phase="post"
        )
        if post_set:
            extracted.update(post_set)

        # Optional re-parse if branch switched after POST
        if post_type and post_type != selected_type and isinstance(all_sel_root, dict) and post_type in all_sel_root:
            print(f"[INFO] Post-rule matched -> switching to '{post_type}' and re-parsing")
            selected_type = post_type
            selectors = all_sel_root[post_type]
            extracted, main_values = _parse_with_selectors(soup, selectors, strict)

        # -------- 3.5) TRANSFERRED: copy fields from Page ----------------------
        transferred_keys = apply_transferred(
            page, manual_config, extracted,
            skip_if_present=True,
            overwrite=False
        )

        # -------- 4) Stats -----------------------------------------------------
        extracted["_stats"] = compute_stats(
            selectors=selectors,
            extracted=extracted,
            selected_type=selected_type,
            extra_exclude=set(transferred_keys)
        )
        print(f"[INFO] Stats: {extracted['_stats']}")

        # -------- 5) Inaktywność ---------------------------------------------
        inactive_rules = getattr(manual_config, "inactive", []) or []
        inactive, reason = is_inactive(
            html=merged_html,
            soup=soup,
            rules=inactive_rules,
            page=page,
            extracted=extracted
        )

        # nowy blok – zapisujemy stan także do extracted
        extracted["is_active"] = not inactive

        if inactive:
            page.is_active = False
            page.inactive_date = now()
            page.meta = page.meta or {}
            page.meta["inactive_reason"] = reason
            print(f"[INACTIVE] Ad is not active {page.url} {reason}")
        else:
            # Live fallback: if not marked inactive from stored HTML, fetch current page and re-evaluate.
            try:
                resp = requests.get(page.url, timeout=20, headers={
                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0 Safari/537.36"
                })
                live_html = resp.text or ""
                if live_html:
                    soup_live = BeautifulSoup(live_html, "html.parser")
                    inactive_live, reason_live = is_inactive(
                        html=live_html,
                        soup=soup_live,
                        rules=inactive_rules,
                        page=page,
                        extracted=extracted,
                    )
                    if inactive_live:
                        extracted["is_active"] = False
                        page.is_active = False
                        page.inactive_date = now()
                        page.meta = page.meta or {}
                        page.meta["inactive_reason"] = reason_live
                        page.meta["inactive_via"] = "live_check"
                        print(f"[INACTIVE-LIVE] Ad became inactive {page.url} {reason_live}")
            except Exception:
                # Network issues should not break parsing pipeline
                pass


        # -------- 6) Save ------------------------------------------------------
        page.raw_data = json.dumps(extracted, ensure_ascii=False)
        page.parse_data = extracted
        page.is_complete = True
        page.save()

        print(f"[SUCCESS] Data parsed and saved for page: {page.url}\n{'=' * 80}")
        return True

    except Exception:
        print(f"[EXCEPTION] Error while parsing page: {page.url}\n{traceback.format_exc()}")
        NetworkSourceError.objects.create(
            source=source,
            error_message=traceback.format_exc(),
            error_type="ManualParsingError",
        )
        send_alert_notification(
            f"Błąd parsowania ogłoszenia {page.url} ({source.title})\n\n{traceback.format_exc()}",
            subject=f"❌ Błąd parsowania: {source.title}",
        )
        return False


# ———— small helper: is this a flat fields map (and not a dict of branches)? ————
def _looks_like_fields_map(d: dict) -> bool:
    if not isinstance(d, dict):
        return False
    # if values look like field configs, treat as flat fields map
    for v in d.values():
        if isinstance(v, dict) and ("fieldType" in v or "selector" in v or "isMain" in v or "fromMain" in v):
            return True
    return False