# ########### html_agregator/utils/html_run.py ###########
import sys
import os
import asyncio
from pathlib import Path

# --- WYMUSZAMY UTF-8 na stdout (ważne w Windows!) ---
if hasattr(sys.stdout, "reconfigure"):
    sys.stdout.reconfigure(encoding="utf-8")
# ----------------------------------------------------

BASE_DIR = Path(__file__).resolve().parents[1]
if str(BASE_DIR) not in sys.path:
    sys.path.insert(0, str(BASE_DIR))

os.environ.setdefault("DJANGO_SETTINGS_MODULE", "NetworkMonitoring.settings")

import django  # noqa: E402
django.setup()

from asgiref.sync import sync_to_async  # noqa: E402
from playwright.async_api import async_playwright  # noqa: E402
from extractly.models import NetworkMonitoredPage  # noqa: E402
from html_agregator.html_fetcher import fetch_and_save_html_for_pages  # noqa: E402
from html_agregator.utils.process import get_pages_to_process  # noqa: E402


async def process_network_monitored_pages(
    headless: bool = False,
    id_: int | None = None,
    enable: str = "true",
    names: list[str] | None = None,
    include_fetched: bool = False,
):
    # Zbuduj listę stron do pobrania (albo pojedynczy rekord po ID)
    if id_:
        try:
            obj = await sync_to_async(NetworkMonitoredPage.objects.get)(id=id_)
            pages = [obj]
        except NetworkMonitoredPage.DoesNotExist:
            print(f"Brak NetworkMonitoredPage o id={id_}")
            return
    else:
        pages_qs = await get_pages_to_process(
            enable=enable,
            name=names,
            include_fetched=include_fetched,
        )
        pages = await sync_to_async(list)(pages_qs)
        print(f"Do pobrania: {len(pages)} stron")

    async with async_playwright() as p:
        # stealth + stabilność w headless
        browser = await p.chromium.launch(
            headless=headless,
            args=[
                "--disable-blink-features=AutomationControlled",
                "--no-sandbox",
                "--disable-dev-shm-usage",
            ],
        )
        context = await browser.new_context(
            user_agent=(
                "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 "
                "(KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36"
            ),
            locale="pl-PL",
            timezone_id="Europe/Warsaw",
            viewport={"width": 1366, "height": 768},
            extra_http_headers={
                "Accept-Language": "pl-PL,pl;q=0.9,en;q=0.8",
                "Upgrade-Insecure-Requests": "1",
            },
        )

        # proste „odmaskowanie” automatyzacji
        await context.add_init_script("""
        Object.defineProperty(Navigator.prototype, 'webdriver', { get: () => undefined });
        window.chrome = window.chrome || { runtime: {} };
        Object.defineProperty(navigator, 'languages', { get: () => ['pl-PL','pl'] });
        Object.defineProperty(navigator, 'plugins',  { get: () => [1,2,3,4,5] });
        """)

        page = await context.new_page()

        # UWAGA: przekazujemy Page, nie Browser
        await fetch_and_save_html_for_pages(page, pages=pages)

        await browser.close()


if __name__ == "__main__":
    import argparse

    parser = argparse.ArgumentParser(description="Runner dla HTML agregatora (Playwright).")
    parser.add_argument("--headless", action="store_true", default=False, help="Uruchom bez UI (headless)")
    parser.add_argument("--id", type=int, help="Pobierz tylko jeden NetworkMonitoredPage po ID")
    parser.add_argument("--enable", choices=["true", "false", "all"], default="true", help="Filtr enabled")
    parser.add_argument("--name", nargs="*", help="Filtr po nazwie źródła (case-insensitive, można kilka)")
    parser.add_argument("--include-fetched", action="store_true", default=False, help="Uwzględnij już przetworzone")

    args = parser.parse_args()

    asyncio.run(
        process_network_monitored_pages(
            headless=args.headless,
            id_=args.id,
            enable=args.enable,
            names=args.name,
            include_fetched=args.include_fetched,
        )
    )
