from django.core.management.base import BaseCommand
from bs4 import BeautifulSoup
from manual_agregator.parser.inactive import is_inactive
from extractly.models import SourceNetwork
import requests


SAMPLES = {
    "banner": """
    <html><body>
      <div data-cy='expired-ad-alert'>Ogłoszenie wygasło</div>
    </body></html>
    """,
    "text_only": """
    <html><body>
      <div>Ta oferta została usunięta. Ogłoszenie jest już niedostępne.</div>
    </body></html>
    """,
}


class Command(BaseCommand):
    help = "Test inactive rules for a given SourceNetwork.name using synthetic HTML and optional file."

    def add_arguments(self, parser):
        parser.add_argument("--source-name", default="Otodom", help="SourceNetwork.name to test (default: Otodom)")
        parser.add_argument("--sample", choices=["banner", "text_only"], default="banner", help="Which built-in sample to test")
        parser.add_argument("--html-file", help="Optional path to an HTML file to test instead of a sample")
        parser.add_argument("--url", help="Optional URL string to test and to set page.url")
        parser.add_argument("--fetch", action="store_true", help="If set, fetch HTML from --url and test against it")

    def handle(self, *args, **options):
        name = options.get("source_name") or options.get("source-name") or "Otodom"
        sample = options.get("sample")
        html_file = options.get("html_file")
        test_url = options.get("url") or "https://otodom.pl/pl/oferta/przyklad"

        src = SourceNetwork.objects.filter(name__iexact=name).first()
        if not src or not getattr(src, "manual_data_source_fetcher", None):
            self.stderr.write(self.style.ERROR(f"Source '{name}' not found or has no SourceManual."))
            return
        manual = src.manual_data_source_fetcher
        rules = manual.inactive or []

        if options.get("fetch"):
            if not test_url:
                self.stderr.write(self.style.ERROR("--fetch requires --url"))
                return
            try:
                resp = requests.get(test_url, timeout=20, headers={
                    "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/118.0 Safari/537.36"
                })
                # Do not raise on HTTP errors; use whatever body is available (e.g., 410/404 pages)
                html = resp.text or ""
            except Exception as e:
                self.stderr.write(self.style.ERROR(f"Failed to fetch URL: {e}"))
                return
        elif html_file:
            try:
                with open(html_file, "r", encoding="utf-8") as f:
                    html = f.read()
            except Exception as e:
                self.stderr.write(self.style.ERROR(f"Failed to read file: {e}"))
                return
        else:
            html = SAMPLES.get(sample, SAMPLES["banner"]).strip()

        soup = BeautifulSoup(html, "html.parser")

        # minimal page stub
        class _Page:
            url = test_url
        page = _Page()

        inactive, reason = is_inactive(html=html, soup=soup, rules=rules, page=page, extracted={})
        self.stdout.write(f"inactive={inactive}, reason={reason}")