from django.core.management.base import BaseCommand
from django.utils.timezone import now
from django.db import transaction
from bs4 import BeautifulSoup
from urllib.parse import urlparse
import requests

from extractly.models import NetworkMonitoredPage, SourceNetwork
from manual_agregator.parser.inactive import is_inactive


def _canon_url(u: str) -> str:
    try:
        p = urlparse(u)
        host = p.netloc
        if host.startswith("www."):
            host = host[4:]
        return f"{p.scheme}://{host}{p.path}".rstrip("/")
    except Exception:
        return (u or "").rstrip("/")


class Command(BaseCommand):
    help = "Fetch live HTML for a specific URL, evaluate inactive rules, and update NetworkMonitoredPage and AdsManual if inactive."

    def add_arguments(self, parser):
        parser.add_argument("--url", required=True, help="Listing URL to recheck")
        parser.add_argument("--source-name", default="Otodom", help="SourceNetwork.name (default: Otodom)")
        parser.add_argument("--timeout", type=int, default=25, help="HTTP timeout seconds (default 25)")
        parser.add_argument("--dry-run", action="store_true", help="Only print status, do not update DB")

    def handle(self, *args, **opts):
        url = opts.get("url")
        src_name = opts.get("source_name") or opts.get("source-name") or "Otodom"
        dry_run = bool(opts.get("dry_run"))

        # Find source and page (try with and without www)
        src = SourceNetwork.objects.filter(name__iexact=src_name).first()
        if not src:
            self.stderr.write(self.style.ERROR(f"SourceNetwork '{src_name}' not found"))
            return

        u_norm = _canon_url(url)
        candidates = NetworkMonitoredPage.objects.filter(source=src, url__icontains=u_norm.split("/pl/oferta/")[-1])
        page = candidates.order_by("-id").first()
        if not page:
            page = NetworkMonitoredPage.objects.filter(source=src, url__icontains=u_norm).order_by("-id").first()

        if not page:
            self.stderr.write(self.style.ERROR("No NetworkMonitoredPage found for this URL (by suffix/contains)."))
            return

        self.stdout.write(f"Found page id={page.id} url={page.url}")

        # Load inactive rules from manual
        manual = getattr(src, "manual_data_source_fetcher", None)
        if not manual:
            self.stderr.write(self.style.ERROR("SourceManual not attached to this SourceNetwork."))
            return
        rules = manual.inactive or []

        # Fetch live HTML
        try:
            resp = requests.get(page.url, timeout=int(opts.get("timeout") or 25), headers={
                "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118 Safari/537.36"
            })
            html = resp.text or ""
        except Exception as e:
            self.stderr.write(self.style.ERROR(f"Fetch failed: {e}"))
            return

        # Build soup and evaluate rules
        soup = BeautifulSoup(html, "html.parser")
        inactive, reason = is_inactive(html=html, soup=soup, rules=rules, page=page, extracted={})
        self.stdout.write(f"inactive={inactive}, reason={reason}")

        if not inactive:
            self.stdout.write(self.style.WARNING("Rules did not mark this listing as inactive."))
            return

        if dry_run:
            self.stdout.write(self.style.SUCCESS("DRY RUN: would update page and AdsManual to is_active=False"))
            return

        with transaction.atomic():
            page.is_active = False
            page.inactive_date = now()
            page.meta = page.meta or {}
            page.meta["inactive_reason"] = reason
            page.save(update_fields=["is_active", "inactive_date", "meta"])

            ad = getattr(page, "network_ad_manual", None)
            if ad is not None:
                # Update AdsManual too
                dirty = False
                if hasattr(ad, "is_active"):
                    ad.is_active = False
                    dirty = True
                if hasattr(ad, "inactive_date"):
                    ad.inactive_date = now()
                    dirty = True
                if hasattr(ad, "inactive_reason"):
                    try:
                        setattr(ad, "inactive_reason", reason)
                        dirty = True
                    except Exception:
                        pass
                if dirty:
                    ad.save()

        self.stdout.write(self.style.SUCCESS("Updated page and AdsManual to inactive."))
