import sys
import os
import asyncio
from pathlib import Path
from itertools import product
from urllib.parse import urljoin
from django.db import IntegrityError, transaction
from django.utils import timezone

# --- WYMUSZAMY UTF-8 na stdout (ważne w Windows!) ---
if hasattr(sys.stdout, "reconfigure"):
    sys.stdout.reconfigure(encoding="utf-8")
# ----------------------------------------------------

from asgiref.sync import sync_to_async

BASE_DIR = Path(__file__).resolve().parents[1]
if str(BASE_DIR) not in sys.path:
    sys.path.insert(0, str(BASE_DIR))
os.environ.setdefault("DJANGO_SETTINGS_MODULE", "NetworkMonitoring.settings")

from playwright.async_api import async_playwright
from extractly.models import (
    SourceNetwork,
    NetworkMonitoredPage,
    NetworkSourceError,
    RawMonitoredLink,
    NetworkStatus,
    Ads,          # <-- dodane
    AdsManual,    # <-- już było
)
from link_agregator.utils.logger import logger
from link_agregator.utils.cookies import handle_cookies
from link_agregator.utils.next_page import paginate_next


async def log_source_error(source, message, error_type="general"):
    await sync_to_async(NetworkSourceError.objects.create)(
        source=source,
        error_message=message[:2048],
        error_type=error_type,
    )

# --- Permutacje ---
def generate_permutations(structure, params):
    perm_keys = [k for k, v in params.items() if isinstance(v, dict)]
    static_keys = [k for k, v in params.items() if not isinstance(v, dict)]

    values = [list(params[k].keys()) for k in perm_keys]
    for combination in product(*values):
        url = structure
        mapping_for_db = {}
        for k, v in zip(perm_keys, combination):
            url = url.replace(f"{{{k}}}", v)
            mapping_for_db[k] = params[k][v]
        for k in static_keys:
            url = url.replace(f"{{{k}}}", params[k])
            mapping_for_db[k] = params[k]
        yield url, mapping_for_db


async def get_total_pages_from_pagination(page, pagination_numbers_selector):
    elements = await page.query_selector_all(pagination_numbers_selector)
    numbers = []
    for el in elements:
        try:
            txt = await el.inner_text()
            num = int(''.join(filter(str.isdigit, txt)))
            numbers.append(num)
        except Exception:
            continue
    if numbers:
        return max(numbers)
    return 1  # Fallback: minimum 1 strona


async def extract_links_from_section(page, section_selector, url_attr, base_url):
    elements = await page.query_selector_all(section_selector)
    logger.info(f"[DEBUG] Znaleziono {len(elements)} sekcji przez selektor {section_selector}")
    links = []
    for idx, el in enumerate(elements):
        tag_name = await el.evaluate("(e) => e.tagName.toLowerCase()")
        if tag_name == "a":
            val = await el.get_attribute(url_attr)
            if val:
                logger.info(f"[DEBUG] Znalazłem atrybut {url_attr} w <a>: {val}")
                links.append(val if val.startswith("http") else urljoin(base_url, val))
        else:
            val = await el.get_attribute(url_attr)
            if not val:
                a_tags = await el.query_selector_all('a')
                for a_el in a_tags:
                    val2 = await a_el.get_attribute(url_attr)
                    if val2:
                        logger.info(f"[DEBUG] Znalazłem atrybut w <a>: {val2}")
                        links.append(val2 if val2.startswith("http") else urljoin(base_url, val2))
                if not links:
                    for attr in ["data-url", "data-href", "href"]:
                        fallback = await el.get_attribute(attr)
                        if fallback:
                            logger.info(f"[DEBUG] Znalazłem atrybut fallback {attr}: {fallback}")
                            links.append(fallback if fallback.startswith("http") else urljoin(base_url, fallback))
            else:
                logger.info(f"[DEBUG] Znalazłem atrybut {url_attr}: {val}")
                links.append(val if val.startswith("http") else urljoin(base_url, val))
        html = await el.inner_html()
        logger.debug(f"[HTML_SECTION {idx}] {html}")

    logger.info(f"[EXTRACTED] Wyszukano {len(links)} linków (selector: {section_selector} attr: {url_attr})")
    return links


# --- Obsługa cookies na domenę ---
cookies_clicked_for_baseurl = set()

async def ensure_cookies_accepted_for_baseurl(page, base_url, ai_config):
    if base_url not in cookies_clicked_for_baseurl:
        await handle_cookies(page, ai_config)
        cookies_clicked_for_baseurl.add(base_url)
        logger.info(f"Cookies zaakceptowane dla domeny: {base_url}")
    else:
        logger.info(f"Cookies już zaakceptowane dla domeny: {base_url}")


# --- Helpery sync <-> async ---

@sync_to_async
def _clear_status_for_source(source):
    # Czyszczenie NetworkStatus dla danego źródła
    NetworkStatus.objects.filter(source=source).delete()

@sync_to_async
def _cascade_deactivate(source, expired_urls, now):
    """
    Atomically deactivate:
      - NetworkMonitoredPage
      - Ads
      - AdsManual
    joined via OneToOne reverse: <Model>.objects.filter(networkmonitoredpage__...)
    Returns tuple of counts (pages, ads, ads_manual).
    """
    with transaction.atomic():
        # Pages
        pages_qs = NetworkMonitoredPage.objects.filter(
            source=source, url__in=expired_urls, is_active=True
        )
        pages_updated = pages_qs.update(
            # is_active=False,
            check_active=True,
            inactive_date=now
            )

        # Ads
        ads_updated = Ads.objects.filter(
            networkmonitoredpage__source=source,
            networkmonitoredpage__url__in=expired_urls,
            is_active=True,
        ).update(
            # is_active=False,
            check_active=True,
            inactive_date=now
            )
        
        # AdsManual
        ads_manual_updated = AdsManual.objects.filter(
            networkmonitoredpage__source=source,
            networkmonitoredpage__url__in=expired_urls,
            is_active=True,
        ).update(is_active=False, check_active=True, inactive_date=now)

        return pages_updated, ads_updated, ads_manual_updated



@sync_to_async
def _reactivate_return_counts(source, active_urls_now):
    """
    Jeśli ogłoszenie wróciło na listę, a w DB było nieaktywne:
      - NetworkMonitoredPage: is_active=True, inactive_date=None
      - Ads/AdsManual: is_active=True, inactive_date=None, isSendToMainServer=False
    Zwraca (pages_re, ads_re, ads_manual_re).
    """
    with transaction.atomic():
        pages_re = NetworkMonitoredPage.objects.filter(
            source=source, url__in=active_urls_now, is_active=False
        ).update(is_active=True, inactive_date=None)

        ads_re = Ads.objects.filter(
            networkmonitoredpage__source=source,
            networkmonitoredpage__url__in=active_urls_now,
            is_active=False,
        ).update(is_active=True, inactive_date=None, isSendToMainServer=False)

        ads_manual_re = AdsManual.objects.filter(
            networkmonitoredpage__source=source,
            networkmonitoredpage__url__in=active_urls_now,
            is_active=False,
        ).update(is_active=True, inactive_date=None, isSendToMainServer=False)

        return pages_re, ads_re, ads_manual_re
    



async def scrape_single_permutation(
    page, base_url, selectors, permutation_url,
    pagination_pattern, first_page_pagination, mapping_for_db, source, mode="fetch"
):
    section_selector = selectors.get("section")
    url_attr = selectors.get("url_attr", "href")
    wrapper_selector = selectors.get("wait_for_wrapper", ".lista_ofert")
    ai_config = selectors
    pagination_strategy = selectors.get("pagination_strategy", "url_change")
    next_selector = selectors.get("next_selector")
    page_url_template = selectors.get("page_url_template", "?page={{number}}")
    pagination_numbers_selector = selectors.get("total_pages_selector")
    page_num = 1

    consecutive_duplicates = 0
    DUPLICATE_LIMIT = 400

    last_links_set = None
    duplicate_pages_in_a_row = 0
    MAX_DUPLICATE_PAGES = 2

    total_pages = None
    first_page = True

    while True:
        # --- 1. Przejście na pierwszą stronę
        if page_num == 1:
            url = permutation_url.replace("{pagination}", first_page_pagination)
            url = urljoin(base_url, url)
            if "{pagination}" in url:
                logger.warning(f"[ZABEZPIECZENIE] Próbowałem wejść na niepodstawiony link z '{{pagination}}': {url} — przerywam permutację!")
                await log_source_error(
                    source,
                    f"Próba wejścia na niepodstawiony link z '{{pagination}}': {url} (permutacja: {permutation_url})",
                    error_type="bad_pagination_link"
                )
                break
            try:
                await page.goto(url, wait_until="domcontentloaded", timeout=60000, referer=base_url)
                logger.info(f"[PAGE] Strona {page_num} — {url}")
            except Exception as e:
                logger.error(f"[GOTO PERM] Nie udało się otworzyć {url}: {e}")
                await log_source_error(source, f"GOTO PERM ERROR: {url}: {e}", error_type="permutation_goto_failed")
                break

            logger.info(f"[PAGE] Strona {page_num} — {url}")

        # --- 2. Wyznacz ile jest stron (raz na początku)
        if total_pages is None and pagination_numbers_selector and page_num == 1:
            try:
                total_pages = await get_total_pages_from_pagination(page, pagination_numbers_selector)
                logger.info(f"[PAGINACJA] Wyznaczono total_pages={total_pages} na podstawie {pagination_numbers_selector}")
            except Exception as e:
                logger.warning(f"[PAGINACJA] Nie udało się wyznaczyć liczby stron: {e}")
                total_pages = None

        # --- 3. Akceptacja cookies
        await ensure_cookies_accepted_for_baseurl(page, base_url, ai_config)
        try:
            await page.wait_for_selector('.cookie-modal, .cookies', state='detached', timeout=45000)
        except Exception:
            pass

        try:
            await page.wait_for_selector(wrapper_selector, timeout=45000)
        except Exception as exc:
            logger.warning(f"[WARN] Brak ogłoszeń lub timeout selector: {exc} — przerywam.")
            break

        # --- 4. Wyciąganie linków
        full_urls = await extract_links_from_section(page, section_selector, url_attr, base_url)
        logger.info(f"[FOUND] Znaleziono {len(full_urls)} ogłoszeń.")

        links_set = set(full_urls)
        if last_links_set is not None and links_set == last_links_set:
            duplicate_pages_in_a_row += 1
            logger.warning(f"[DUPLICATE PAGE] Strona {page_num} zawiera te same linki co poprzednia ({duplicate_pages_in_a_row} raz/y pod rząd).")
            if duplicate_pages_in_a_row >= MAX_DUPLICATE_PAGES:
                logger.warning(f"[END] Strona zduplikowana {MAX_DUPLICATE_PAGES} razy pod rząd – przerywamy permutację.")
                break
        else:
            duplicate_pages_in_a_row = 0
        last_links_set = links_set

        # --- 5. Zapis ogłoszeń do bazy, zapis meta["total_pages"]
        for ad_url in full_urls:
            full_url = ad_url if ad_url.startswith(base_url) else urljoin(base_url, ad_url)
            meta = {
                "base_url": base_url,
                "params": mapping_for_db,
                "selectors": selectors,
                "page_num": page_num,
                "scraped_url": url,
                "total_pages": total_pages
            }
            if "city" in mapping_for_db:
                meta["city"] = mapping_for_db["city"]
            if "estate_type" in mapping_for_db:
                meta["estate_type"] = mapping_for_db["estate_type"]
            if "offer_type" in mapping_for_db:
                meta["offer_type"] = mapping_for_db["offer_type"]

            if mode == "fetch":
                try:
                    await sync_to_async(RawMonitoredLink.objects.create)(
                        url=full_url,
                        source=source,
                    )
                except Exception as exc:
                    logger.warning(f"[RAW-LINK-ERROR] {full_url}: {exc}")
                try:
                    await sync_to_async(NetworkMonitoredPage.objects.create)(
                        url=full_url,
                        estate_type=mapping_for_db.get("estate_type"),
                        offer_type=mapping_for_db.get("offer_type"),
                        source=source,
                        name=source.name,
                        meta=meta
                    )
                    consecutive_duplicates = 0
                except IntegrityError:
                    consecutive_duplicates += 1
                    logger.info(f"[DUPLICATE] {full_url} (duplikatów pod rząd: {consecutive_duplicates})")
                except Exception as exc:
                    logger.error(f"[ERROR] Problem z zapisem {full_url}: {exc}")

            elif mode == "check":
                try:
                    await sync_to_async(NetworkStatus.objects.get_or_create)(
                        url=full_url,
                        source=source,
                    )
                except Exception as exc:
                    logger.warning(f"[NETWORKSTATUS-ERROR] {full_url}: {exc}")

        # --- 6. Przerywamy gdy limit ogłoszeń
        skip_limit = getattr(source, "skip_when_less", None)
        if skip_limit is not None and len(full_urls) < skip_limit:
            logger.warning(f"[SKIP] Znaleziono tylko {len(full_urls)} linków (limit: {skip_limit}) — przerywamy tę kategorię/permutację.")
            break

        if not full_urls:
            logger.warning(f"[NO LINKS] Brak linków na stronie {url} (selector: {section_selector}, attr: {url_attr}) — przerywam paginację.")
            await log_source_error(
                source,
                f"No links found on {url} (selector: {section_selector}, attr: {url_attr})",
                error_type="no_links"
            )
            page_source = await page.content()
            with open(f'debug_page_{page_num}.html', 'w', encoding='utf-8') as f:
                f.write(page_source)
            break

        if consecutive_duplicates >= DUPLICATE_LIMIT:
            logger.warning(f"[END] Przekroczono limit {DUPLICATE_LIMIT} kolejnych duplikatów — przerywam paginację dla tej permutacji.")
            break

        # --- 7. Przerywamy jeśli przekroczono total_pages
        if total_pages is not None and page_num >= total_pages:
            logger.info(f"[END] Osiągnięto ostatnią stronę: {total_pages}, przerywam permutację.")
            break

        # --- 8. PAGINACJA – tylko przez paginate_next
        should_continue = await paginate_next(
            page=page,
            selectors=selectors,
            pagination_strategy=pagination_strategy,
            page_num=page_num,
            base_url=base_url,
            permutation_url=permutation_url,
            pagination_pattern=pagination_pattern,
            first_page_pagination=first_page_pagination,
            section_selector=section_selector,
            next_selector=next_selector
        )
        if not should_continue:
            break
        page_num += 1

    logger.info(f"[END] Ukończono: {permutation_url}")


async def scrape_all_sources(source_id=None, params=None, mode="fetch", headless=False):
    if source_id:
        sources = await sync_to_async(list)(SourceNetwork.objects.filter(id=source_id, enabled=True))
    else:
        sources = await sync_to_async(list)(SourceNetwork.objects.filter(enabled=True))

    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=headless)
        context = await browser.new_context(
            user_agent="Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115 Safari/537.36",
            locale="pl-PL",
            viewport={"width": 1280, "height": 800}
        )
        page = await context.new_page()

        logger.info(f"[SRC] Liczba źródeł do przetworzenia: {len(sources)}")
        logger.info(f"[SRC] Źródła: {[s.title for s in sources]}")

        for source in sources:
            if mode == "check":
                # Czyść bufor na początku dla każdego portalu (bezpiecznie, sync_to_async wrapper)
                await _clear_status_for_source(source)

            base_url = source.base_url
            structure = source.structure
            source_params = source.params.copy()
            selectors = source.selectors.copy()
            pagination_pattern = source.pagination
            first_page_pagination = getattr(source, 'first_page_pagination', None) or selectors.get("first_page_pagination") or ""

            if params:
                if "base_url" in params:
                    base_url = params["base_url"]
                if "structure" in params:
                    structure = params["structure"]
                if "params" in params:
                    source_params.update(params["params"])
                if "selectors" in params:
                    selectors.update(params["selectors"])
                if "pagination" in params:
                    pagination_pattern = params["pagination"]
                if "first_page_pagination" in params:
                    first_page_pagination = params["first_page_pagination"]

            permutations = list(generate_permutations(structure, source_params))
            logger.info(f"[PERMUTACJE] {source.title} — {len(permutations)} kombinacji")
            logger.info(f"[PERMUTACJE-LIST] Permutacje dla {source.title}:")
            for perm_url, mapping_for_db in permutations:
                logger.info(f"   -> URL: {perm_url} | MAP: {mapping_for_db}")

            # --- AKCEPTUJ COOKIES tylko raz na domenę ---
            logger.info(f"[GOTO] Próba wejścia na {base_url}")
            try:
                if page.is_closed():
                    logger.warning(f"[PAGE CLOSED] Strona zamknięta – tworzę nową stronę")
                    page = await context.new_page()
                    await asyncio.sleep(0.5)

                await asyncio.sleep(0.5)  # Daj chwilę na "rozgrzanie"
                await page.goto(base_url, wait_until="domcontentloaded", timeout=60000)
                logger.info(f"[GOTO OK] Weszło na {base_url}")
            except Exception as e:
                logger.error(f"[GOTO ERROR] Nie udało się otworzyć base_url: {base_url}: {e}")

                # Screenshot przy błędzie
                try:
                    await page.screenshot(path=f"errors/goto_error_{source.id}.png")
                    logger.info("Zrobiono screenshot błędu.")
                except Exception:
                    logger.warning("Nie udało się zrobić screenshota.")

                await log_source_error(source, f"GOTO ERROR for base_url: {base_url}: {e}", error_type="goto_failed")
                continue

            logger.info(f"[COOKIES] Otwieram base_url: {base_url} i akceptuję cookies (tylko raz dla tej domeny).")
            await ensure_cookies_accepted_for_baseurl(page, base_url, selectors)
            try:
                await page.wait_for_selector('.cookie-modal, .cookies', state='detached', timeout=45000)
            except Exception:
                pass

            # --- Przechodzimy przez permutacje ---
            for permutation_url, mapping_for_db in permutations:
                await scrape_single_permutation(
                    page,
                    base_url,
                    selectors,
                    permutation_url,
                    pagination_pattern,
                    first_page_pagination,
                    mapping_for_db,
                    source,
                    mode=mode,
                )

            # --- PO ZAKOŃCZENIU WSZYSTKICH PERMUTACJI: SQL status-check ---
            # --- PO ZAKOŃCZENIU WSZYSTKICH PERMUTACJI: SQL status-check ---
            if mode == "check":
                # 1. Linki z bazy (aktywne)
                db_links_qs = NetworkMonitoredPage.objects.filter(is_active=True, source=source)
                db_links = set(await sync_to_async(list)(db_links_qs.values_list("url", flat=True)))

                # 2. Linki ze statusów (zebrane z list)
                status_links_qs = NetworkStatus.objects.filter(source=source)
                status_links = set(await sync_to_async(list)(status_links_qs.values_list("url", flat=True)))

                # 3. Linki wygasłe – do dezaktywacji
                expired = db_links - status_links
                now = timezone.now()
                pages_upd, ads_upd, ads_manual_upd = await _cascade_deactivate(source, expired, now)
                logger.info(
                    f"Oznaczono jako nieaktywne {len(expired)} ogłoszeń (status check dla {source.title}) "
                    f"[pages:{pages_upd}, ads:{ads_upd}, ads_manual:{ads_manual_upd}]"
                )

                # 3.5 Linki „reaktywowane” – są na liście, ale w DB miały is_active=False
                #    (czyli wróciły na portal / znów są dostępne)
                pages_re, ads_re, ads_manual_re = await _reactivate_return_counts(source, status_links)
                if any((pages_re, ads_re, ads_manual_re)):
                    logger.info(
                        f"Reaktywowane ogłoszenia (is_active=True, isSendToMainServer=False): "
                        f"[pages:{pages_re}, ads:{ads_re}, ads_manual:{ads_manual_re}]"
                    )

                # 4. Linki zagubione – są na stronie, ale nie ma ich w bazie
                missing = status_links - db_links
                added = 0
                for url in missing:
                    try:
                        await sync_to_async(NetworkMonitoredPage.objects.create)(
                            url=url,
                            source=source,
                            name=source.name,
                            is_active=True,
                            meta={"note": "Dodano przez status-check"}
                        )
                        added += 1
                    except Exception as e:
                        logger.warning(f"[CHECKER-ADD-ERROR] {url}: {e}")
                logger.info(f"Dodano {added} nowych ogłoszeń (brakujących w bazie) podczas status check")


        await browser.close()
        logger.info("=== Agregator zakończył pracę, browser zamknięty ===")
