
from __future__ import annotations    
import uuid
import jinja2
import json
from bs4 import BeautifulSoup
from asgiref.sync import sync_to_async
from django.contrib.contenttypes.models import ContentType
from link_agregator.utils.logger import logger  # logger z Twojego systemu
import re

async def get_value_by_action(page, selector, action="text_content"):
    locator = page.locator(selector)
    if await locator.count() == 0:
        return None
    el = locator.first
    if action == "text_content":
        return (await el.text_content() or "").strip()
    elif action == "inner_html":
        return await el.inner_html()
    elif action == "outer_html":
        return await el.evaluate("e => e.outerHTML")
    elif action.startswith("get_attribute:"):
        attr = action.split(":", 1)[1]
        return await el.get_attribute(attr)
    elif action == "screenshot":
        path = f"screenshots/{uuid.uuid4()}.png"
        await el.screenshot(path=path)
        return path
    elif action.startswith("evaluate:"):
        js_code = action.split(":", 1)[1]
        return await el.evaluate(js_code)
    else:
        return (await el.text_content() or "").strip()



async def process_selectors(obj, page, selectors_map, html, parent_extra={}):
    for key, value in selectors_map.items():
        if key == "merge":
            continue

        if isinstance(value, dict) and value.get("merge"):
            skip_selectors = value.get("skip_selectors", [])
            if skip_selectors and html:
                soup = BeautifulSoup(html, "lxml")  # ✅ Użyj parsera lxml
                total_removed = 0
                for selector in skip_selectors:
                    found = soup.select(selector)
                    removed = len(found)
                    for el in found:
                        el.decompose()
                    logger.info(f"[SKIP] Usunięto {removed} elementów dla selektora: {selector}")
                    total_removed += removed
                logger.info(f"[SKIP] Łącznie usunięto {total_removed} elementów w merge bloku: {key}")
                html = str(soup)
                force_use_bs4 = True
            else:
                force_use_bs4 = False

            if "content_type" in value and "content_field" in value:
                merged_result = {}
                for subkey, subval in value.items():
                    if subkey in {"merge", "content_type", "content_field", "skip_selectors"}:
                        continue
                    selector = subval.get("selector")
                    playwright_action = subval.get("playwright_action", "text_content")
                    field_value = ""

                    if selector and not force_use_bs4:
                        field_value = await get_value_by_action(page, selector, playwright_action)
                        if field_value is None:
                            soup = BeautifulSoup(html, "lxml")
                            el = soup.select_one(selector)
                            if el:
                                field_value = el.decode() if playwright_action == "inner_html" else el.text.strip()
                            else:
                                field_value = ""
                    elif selector:
                        soup = BeautifulSoup(html, "lxml")
                        el = soup.select_one(selector)
                        if el:
                            field_value = el.decode() if playwright_action == "inner_html" else el.text.strip()
                        else:
                            field_value = ""
                    merged_result[subkey] = field_value

                merged_str = json.dumps(merged_result, ensure_ascii=False)
                ct = await sync_to_async(ContentType.objects.get_for_id)(value["content_type"])
                model_class = ct.model_class()
                setattr(obj, value["content_field"], merged_str)
                await sync_to_async(obj.save)()
            else:
                await process_selectors(obj, page, value, html, parent_extra)
            continue

        if isinstance(value, dict) and ("content_type" in value or "create_new" in value):
            await process_nested_selector_block(obj, page, value, html)
            continue

        selector = value.get("selector")
        content_type_id = value.get("content_type")
        content_field = value.get("content_field")
        playwright_action = value.get("playwright_action", "text_content")
        create_new = value.get("create_new", False)
        extra_fields = value.get("extra_fields", {}).copy()
        extra_fields.update(parent_extra)

        result_value = None
        if selector:
            result_value = await get_value_by_action(page, selector, playwright_action)
            if result_value is None:
                soup = BeautifulSoup(html, "html.parser") # maybe lxml
                el = soup.select_one(selector)
                if el:
                    result_value = el.decode() if playwright_action == "inner_html" else el.text.strip()
                else:
                    result_value = ""
        else:
            result_value = ""

        if content_type_id:
            ct = await sync_to_async(ContentType.objects.get_for_id)(content_type_id)
            model_class = ct.model_class()
            if create_new:
                fields = {content_field: result_value, **extra_fields}
                instance = await sync_to_async(model_class.objects.create)(**fields)
            else:
                instance = await sync_to_async(model_class.objects.get)(pk=obj.id)
                setattr(instance, content_field, result_value)
                await sync_to_async(instance.save)()
        else:
            setattr(obj, content_field, result_value)

    await sync_to_async(obj.save)()


async def process_nested_selector_block(obj, page, config, html, parent_model_info=None):
    model_info = {
        "content_type": config.get("content_type"),
        "create_new": config.get("create_new", False),
        "id": config.get("id"),
    }
    default_action = config.get("playwright_action", "text_content")
    values = {}

    for key, val in config.items():
        if key in {"content_type", "create_new", "id", "playwright_action"}:
            continue
        if isinstance(val, dict) and "selector" in val:
            action = val.get("playwright_action", default_action)
            value = await get_value_by_action(page, val["selector"], action)
            if value is None:
                soup = BeautifulSoup(html, "html.parser")
                el = soup.select_one(val["selector"])
                if el:
                    value = el.decode() if action == "inner_html" else el.text.strip()
                else:
                    value = ""
            values[key] = value
        elif isinstance(val, dict) and "action" in val:
            template = jinja2.Template(val["action"])
            values[key] = template.render(obj=obj)
        elif isinstance(val, dict):
            values[key] = await process_nested_selector_block(obj, page, val, html, model_info)
        else:
            values[key] = val

    instance = None
    if model_info["create_new"]:
        ct = await sync_to_async(ContentType.objects.get_for_id)(model_info["content_type"])
        model_class = ct.model_class()
        instance = await sync_to_async(model_class.objects.create)(**values)
        obj.content_type_id = model_info["content_type"]
        obj.object_id = instance.pk
        await sync_to_async(obj.save)()

    return instance




"""
Wrapper na UniversalListingParser
---------------------------------
Zwraca gotową paczkę do serializacji:
    raw_text      – spłaszczony tekst
    image_links   – lista src grafik
    parse_data    – opis + słownik KV (bez duplikatów, bez śmieci)
"""

from html_agregator.utils.dynamic_parser import UniversalListingParser


def raw_data_cleaner(html: str) -> dict:
    parser = UniversalListingParser(html)

    flat_raw_text = "\n".join(
        ln for sec in parser.raw_sections for ln in sec["lines"]
    )

    # parse_data = opis + reszta cech
    parse_data = {"description": parser.description}
    parse_data.update(parser.key_value)

    return {
        "raw_text":    flat_raw_text,
        "image_links": parser.image_links,
        "parse_data":  parse_data,   # <--- najważniejsze
    }
