import time
import uuid
from datetime import datetime
from django.utils import timezone
from asgiref.sync import sync_to_async
from urllib.parse import urlparse

from extractly.models import NetworkMonitoredPage, SourceHtml
from link_agregator.utils.logger import logger
from cloud_storage.services import upload_html_json_to_ovh
from link_agregator.utils.cookies import handle_cookies
from html_agregator.utils.actions import run_actions_on_page
from html_agregator.utils.selectors import process_selectors, raw_data_cleaner
from html_agregator.utils.process import get_pages_to_process
from link_agregator.check_active.inactive import is_inactive as INACTIVE
from html_agregator.utils.error_guard import check_and_normalize_html  # NEW!

paused = False

def to_serializable(val):
    if isinstance(val, uuid.UUID):
        return str(val)
    if isinstance(val, datetime):
        return val.isoformat()
    if isinstance(val, dict):
        return {k: to_serializable(v) for k, v in val.items()}
    if isinstance(val, list):
        return [to_serializable(x) for x in val]
    return val

async def fetch_and_save_html_for_pages(
    page,
    filter_kwargs=None,
    pages=None,
    enable="true",
    name=None,
    source_ids=None,
):
    if pages is None:
        qs = await get_pages_to_process(
            enable=enable, name=name, source_ids=source_ids
        )
        pages = await sync_to_async(list)(qs)

    logger.info(f"[HTML_FETCH] Rekordów do przetworzenia: {len(pages)}")

    base_url, cookies_ok = None, False

    for obj in pages:
        while paused:
            print("PAUZA – naciśnij 'p' aby wznowić")
            time.sleep(1)

        logger.info(f"[HTML_FETCH]  {obj.url}")

        try:
            # ------------------- Konfiguracja źródła -------------------
            html_cfg = None
            if obj.source_id:
                html_cfg = await sync_to_async(
                    SourceHtml.objects.filter(
                        source_id=obj.source_id
                    ).first
                )()
            if not html_cfg:
                logger.warning("[HTML_FETCH] brak configu – pomijam")
                continue

            # ------------------- Obsługa cookies -----------------------
            parsed = urlparse(obj.url)
            new_base = f"{parsed.scheme}://{parsed.netloc}/"
            if new_base != base_url or not cookies_ok:
                await page.goto(new_base, timeout=75000)
                await handle_cookies(page, getattr(obj, "meta", {}) or {})
                base_url, cookies_ok = new_base, True

            # ------------------- Pobranie strony ------------------------
            response = await page.goto(obj.url, timeout=75000)
            http_status = response.status if response else None
            final_url = page.url
            redirects = []
            if response and hasattr(response, "request"):
                try:
                    chain = (
                        response.request.redirect_chain
                        if hasattr(response.request, "redirect_chain")
                        else []
                    )
                    redirects = [req.url for req in chain] + [final_url]
                except Exception:
                    redirects = [final_url]

            if html_cfg.actions:
                await run_actions_on_page(page, html_cfg.actions)

            html = await page.content()

            # ------------------- Detekcja błędnych HTML-i ----------------
            is_err, err_reason, norm_html = check_and_normalize_html(html, http_status=http_status)
            obj.html = norm_html
            if is_err:
                obj.sliced_html = "error"
                obj.meta = obj.meta or {}
                obj.meta["html_error"] = True
                obj.meta["html_error_reason"] = err_reason
                obj.is_fetched = True
                obj.date_fetched = timezone.now()
                await sync_to_async(obj.save)()
                logger.warning(f"[HTML_FETCH] HTML error detected: {err_reason}")
                continue  # pomijamy dalsze przetwarzanie

            # ------------------- Selectory ------------------------------
            if html_cfg.selectors:
                await process_selectors(obj, page, html_cfg.selectors, html)

            obj.date_fetched = timezone.now()

            # ------------------- Sprawdzenie aktywności ------------------
            inactive_cfg = html_cfg.inactive or []
            is_inactive, reason = INACTIVE(
                html=html,
                inactive_config=inactive_cfg,
                url=final_url,
                http_status=http_status,
                redirects=redirects,
            )
            obj.is_active = not is_inactive
            if is_inactive:
                obj.inactive_date = timezone.now()
                obj.meta = obj.meta or {}
                obj.meta["inactive_reason"] = reason

            # ------------------- Parser ----------------------------------
            cleaned = raw_data_cleaner(obj.sliced_html or html)
            obj.raw_data = cleaned["raw_text"]
            obj.image_links = cleaned["image_links"]
            obj.parse_data = cleaned["parse_data"]
            obj.is_fetched = True   

            await sync_to_async(obj.save)()

            # ------------------- Upload JSON do OVH ---------------------
            json_data = {
                "id": str(obj.id),
                "url": obj.url,
                "html": obj.html,
                "raw_text": cleaned["raw_text"],
                "image_links": cleaned["image_links"],
                "parse_data": cleaned["parse_data"],
                "meta": to_serializable(obj.meta),
                "date_fetched": obj.date_fetched.isoformat(),
                "estate_type": obj.estate_type,
                "offer_type": obj.offer_type,
                "source": obj.source_id,
                "is_active": obj.is_active,
                "inactive_reason": obj.meta.get("inactive_reason") if obj.meta else None,
            }

            file_name = f"{obj.source_id}/{obj.id}.json"
            upload_html_json_to_ovh(json_data, obj.id, file_name)

        except Exception as exc:
            logger.warning(f"[HTML_FETCH] X {obj.url}: {exc}")
            continue
