# image_agregator/tasks.py
from __future__ import annotations

import json
import logging
from typing import Iterable, List, Optional

from celery import shared_task
from django.core.cache import cache
from django.db.models import Q

from extractly.models import AdsManual
from image_agregator.images import store_main_image
from houslyspace.tasks import task_send_ads_to_cloud

logger = logging.getLogger(__name__)

# ─────────────────────────── Helpers ───────────────────────────
def _normalize_candidates(value) -> List[str]:
    """Normalize candidates from str(list/CSV)/list/tuple to a clean list of http(s) URLs."""
    urls: List[str] = []
    if not value:
        return urls

    if isinstance(value, (list, tuple)):
        urls = list(value)
    elif isinstance(value, str):
        s = value.strip()
        # JSON list in string?
        if s.startswith("[") and s.endswith("]"):
            try:
                loaded = json.loads(s)
                if isinstance(loaded, list):
                    urls = loaded
            except Exception:
                # fall back to CSV
                urls = [u.strip() for u in s.split(",")]
        else:
            # CSV or single URL
            urls = [u.strip() for u in s.split(",")]
    else:
        # last resort
        urls = [str(value).strip()]

    # keep only http(s) and non-empty
    urls = [u for u in urls if u and u.lower().startswith(("http://", "https://"))]
    return urls


def _enqueue_send_to_cloud_safely(log_suffix: str, ad_id: int):
    try:
        task_send_ads_to_cloud.apply_async(
            kwargs={"mode": "pipeline", "limit": 150},
            queue="default",
        )
    except Exception as e:
        logger.warning("send_to_cloud enqueue failed (%s) ad_id=%s: %s", log_suffix, ad_id, e)


# ───────────────────────── Backfill task ───────────────────────
@shared_task(queue="images")
def backfill_main_images(*, limit: int = 1000, ad_ids: Optional[Iterable[int]] = None, include_inactive: bool = False):
    """
    If ad_ids are provided -> process exactly those records.
    Otherwise backfill missing images (light filters).
    """
    if ad_ids:
        base_qs = AdsManual.objects.filter(pk__in=list(ad_ids))
    else:
        # Keep filters consistent with single-id task:
        # - DO NOT filter by check_active here; allow both True/False (consistency).
        base_qs = (
            AdsManual.objects
            .filter(Q(original_image_urls__isnull=False))
            .exclude(Q(processed_without_image=True))
            .filter(
                Q(images__isnull=True) |
                Q(images="") |
                Q(images={})
            )
            .order_by("id")
        )
        if not include_inactive:
            base_qs = base_qs.exclude(Q(is_active=False))

    qs = list(base_qs[:limit]) if not ad_ids else list(base_qs)

    ok = 0
    for obj in qs:
        try:
            if store_main_image(obj):
                ok += 1
        except Exception as e:
            logger.warning("backfill store_main_image error id=%s: %s", getattr(obj, "pk", "?"), e)
            continue
    return ok


# ────────────────────── Single-ID task (robust) ──────────────────────
@shared_task(
    bind=True,
    autoretry_for=(Exception,),
    retry_backoff=60,
    retry_jitter=True,
    retry_kwargs={"max_retries": 3},
    acks_late=True,
    queue="images",
)
def task_store_main_image(self, ad_id: int, overwrite: bool = False) -> dict:
    """
    More robust single-ID image store:
    - Consistent filters vs backfill (do not flip check_active logic)
    - Soft retry on early-miss conditions (record not yet visible, candidates not yet saved)
    - Lock to avoid races
    """
    # IMPORTANT: keep filters consistent; do not exclude by check_active differently than backfill
    ad = (
        AdsManual.objects
        .filter(pk=ad_id)
        .exclude(Q(is_active=False) | Q(processed_without_image=True))
        .first()
    )
    if not ad:
        # Early-miss: allow a short retry; record may not be visible yet post-commit.
        try:
            raise self.retry(countdown=5, exc=RuntimeError(f"ad_missing:{ad_id}"))
        except self.MaxRetriesExceededError:
            return {"status": "not_found", "ad_id": ad_id}

    lock_key = f"img_upload_lock:{ad_id}"
    if not cache.add(lock_key, "1", timeout=300):
        return {"status": "locked", "ad_id": ad_id}

    try:
        if not overwrite:
            imgs = ad.images
            has_main = False
            if isinstance(imgs, dict):
                has_main = bool(imgs.get("main"))
            elif isinstance(imgs, str):
                has_main = bool(imgs.strip())

            if has_main:
                _enqueue_send_to_cloud_safely("skip_exists", ad_id)
                return {"status": "skip_exists", "ad_id": ad_id}

        # Candidates: prefer field on ad; fallback to related NetworkMonitoredPage if present
        candidates = _normalize_candidates(ad.original_image_urls)

        if not candidates:
            nmp = getattr(ad, "networkmonitoredpage", None)
            if nmp:
                pd = getattr(nmp, "parse_data", None) or {}
                # 'parse_data' may contain 'original_image_urls' JSON/list
                cand2 = _normalize_candidates(pd.get("original_image_urls"))
                # or legacy field 'image_links'
                if not cand2:
                    cand2 = _normalize_candidates(getattr(nmp, "image_links", None))
                if cand2:
                    candidates = cand2
                    # persist back to Ad for future fast-path
                    try:
                        ad.original_image_urls = cand2
                        ad.save(update_fields=["original_image_urls"])
                    except Exception as e:
                        logger.debug("Could not persist original_image_urls for ad_id=%s: %s", ad_id, e)

        if not candidates:
            # Early-miss: URLs may be written a moment later; allow a short retry
            try:
                raise self.retry(countdown=5, exc=RuntimeError(f"no_candidates:{ad_id}"))
            except self.MaxRetriesExceededError:
                return {"status": "no_candidates", "ad_id": ad_id}

        # Let store_main_image choose best candidate internally (or pass candidates if your impl expects)
        url = store_main_image(ad)
        if url:
            _enqueue_send_to_cloud_safely("ok", ad_id)
            return {"status": "ok", "ad_id": ad_id, "url": url}

        return {"status": "upload_none", "ad_id": ad_id}

    except Exception as e:
        logger.warning("Image upload failed for ad_id=%s: %s", ad_id, e)
        return {"status": "error", "ad_id": ad_id, "error": str(e)}
    finally:
        cache.delete(lock_key)