#manual_agregator/parser/transferred.py

# -*- coding: utf-8 -*-
"""
Utilities for copying (transferring) fields from NetworkMonitoredPage
into the parser's extracted dict, based on a mapping stored on ManualDataSource.

Config lives on ManualDataSource as JSON in field "transferred" (preferred)
or legacy "trasferred". Example:

{
  "transferred": {
    "listing_url": "url",
    "source_id": "source_id",
    "source_name": "source.title",
    "offer_kind": "offer_type",
    "estate_kind": "estate_type",
    "inactive_reason": "meta.inactive_reason"
  }
}

Integration (in parse_manual_data):
    from manual_agregator.parser.transferred import apply_transferred, compute_stats

    # ... after selectors parsing + post-rules:
    transferred_keys = apply_transferred(page, manual_config, extracted, skip_if_present=True)

    # when computing stats, exclude transferred keys so stats reflect only selector-extracted fields:
    extracted["_stats"] = compute_stats(selectors, extracted, selected_type, extra_exclude=set(transferred_keys))
"""

from __future__ import annotations

import json
import logging
import uuid
from datetime import datetime
from typing import Any, Dict, Iterable, List, Tuple

from .utils import value_is_empty

logger = logging.getLogger(__name__)


def _parse_json_if_str(raw) -> dict:
    """Return dict from raw if it's already a dict; if it's a JSON string try to parse; otherwise {}."""
    if not raw:
        return {}
    if isinstance(raw, dict):
        return raw
    if isinstance(raw, str):
        try:
            parsed = json.loads(raw)
            return parsed if isinstance(parsed, dict) else {}
        except Exception:
            logger.debug("transferred: failed to json-load string mapping; ignoring")
            return {}
    return {}


def get_transferred_map(manual_config) -> dict:
    """
    Prefer manual_config.transferred, fall back to legacy 'trasferred'.
    Always returns a dict (possibly empty).
    """
    # prefer new, fall back to legacy
    raw = getattr(manual_config, "transferred", None) or getattr(manual_config, "trasferred", None) or {}
    return _parse_json_if_str(raw)


def deep_get(obj: Any, path: str) -> Any:
    """
    Dot-path getter. Works with object attributes and dict keys.
    Example: deep_get(page, "source.title") or deep_get(page, "meta.inactive_reason")
    """
    if not path:
        return None
    cur: Any = obj
    for part in str(path).split("."):
        if cur is None:
            return None
        if isinstance(cur, dict):
            cur = cur.get(part)
        else:
            cur = getattr(cur, part, None)
    return cur


def to_serializable(val: Any) -> Any:
    """
    Make common Django/py types JSON-serializable for json.dumps(extracted).
    """
    if isinstance(val, uuid.UUID):
        return str(val)
    if isinstance(val, datetime):
        return val.isoformat()
    if isinstance(val, dict):
        return {k: to_serializable(v) for k, v in val.items()}
    if isinstance(val, (list, tuple)):
        return [to_serializable(x) for x in val]
    return val


def apply_transferred(
    page,
    manual_config,
    extracted: Dict[str, Any],
    *,
    skip_if_present: bool = True,
    overwrite: bool = False,
) -> List[str]:
    """
    Copy fields from NetworkMonitoredPage `page` into `extracted` according to mapping
    defined on ManualDataSource.transferred (or legacy trasferred).

    Args:
        page: NetworkMonitoredPage instance.
        manual_config: ManualDataSource instance.
        extracted: dict to update.
        skip_if_present (default True): do NOT overwrite keys that already have a non-empty value in `extracted`.
        overwrite (default False): hard-overwrite regardless of existing value (wins over skip_if_present).

    Returns:
        List of destination keys that were written (so the caller can exclude from stats).
    """
    mapping = get_transferred_map(manual_config)
    if not mapping:
        return []

    written: List[str] = []
    for dest, src_path in mapping.items():
        try:
            val = deep_get(page, src_path)
        except Exception:
            # robust: ignore broken paths silently
            continue

        if val is None:
            continue

        if not overwrite and skip_if_present:
            if dest in extracted and not value_is_empty(extracted.get(dest)):
                # keep the value already parsed by selectors
                continue

        extracted[dest] = to_serializable(val)
        written.append(dest)

    if written:
        logger.debug("transferred: wrote keys %s", written)

    return written


def compute_stats(
    selectors: Dict[str, Any],
    extracted: Dict[str, Any],
    selected_type: str | None,
    *,
    extra_exclude: Iterable[str] | None = None,
) -> Dict[str, Any]:
    """
    Compute stats for extracted fields, excluding internal keys and optionally transferred keys.
    Use this instead of ad-hoc stats if you want transferred keys excluded.
    """
    exclude = {"_stats"}
    if extra_exclude:
        exclude |= set(extra_exclude)

    total_fields = len([k for k in (selectors or {}).keys() if k not in exclude])
    found_keys = [k for k, v in extracted.items() if k not in exclude and not value_is_empty(v)]
    missing_keys = [k for k in (selectors or {}).keys() if k not in exclude and value_is_empty(extracted.get(k))]

    return {
        "found_fields": len(found_keys),
        "total_fields": total_fields,
        "found_keys": found_keys,
        "missing_keys": missing_keys,
        "type_used": selected_type or "default",
    }
