# extractly/management/commands/link_existing_manuals.py
from django.core.management.base import BaseCommand
from django.db import transaction
from django.db.models import Exists, OuterRef, Q
from django.utils.timezone import now

from extractly.models import NetworkMonitoredPage, AdsManual


class Command(BaseCommand):
    help = (
        "Links NetworkMonitoredPage.network_ad_manual to existing AdsManual by URL, "
        "for pages that are already parsed but not linked yet."
    )

    def add_arguments(self, parser):
        parser.add_argument("--limit", type=int, default=1000,
                            help="Max pages to process this run (default: 1000).")
        parser.add_argument("--batch", type=int, default=200,
                            help="DB save batch size (default: 200).")
        parser.add_argument("--name", help="Optional exact name filter (NetworkMonitoredPage.name).")
        parser.add_argument("--icontains", help="Optional case-insensitive substring match on name.")
        parser.add_argument("--only-id", type=int, help="Process only a single page ID.")
        parser.add_argument("--dry-run", action="store_true", help="Don't write changes; just show what would be done.")
        parser.add_argument("--verbose-urls", action="store_true", help="Print every linked URL.")

    def handle(self, *args, **opts):
        limit = int(opts["limit"])
        batch = max(1, int(opts["batch"]))
        only_id = opts.get("only_id")
        dry_run = bool(opts.get("dry_run"))
        verbose_urls = bool(opts.get("verbose_urls"))

        # Bazowy queryset: strony bez linku i z niepustym URL
        base_qs = NetworkMonitoredPage.objects.filter(
            network_ad_manual__isnull=True
        ).exclude(
            Q(url__isnull=True) | Q(url__regex=r"^\s*$")
        )

        if only_id:
            base_qs = base_qs.filter(id=only_id)

        name = opts.get("name")
        if name:
            base_qs = base_qs.filter(name=name)

        icontains = opts.get("icontains")
        if icontains:
            base_qs = base_qs.filter(name__icontains=icontains)

        # Sprytne ograniczenie: tylko takie strony, dla których ISTNIEJE AdsManual o tym samym URL
        ads_exists = AdsManual.objects.filter(url=OuterRef("url"))
        qs = (
            base_qs
            .annotate(_has_ad=Exists(ads_exists))
            .filter(_has_ad=True)
            .order_by("id")
        )[:limit]

        total_candidates = qs.count()
        if total_candidates == 0:
            self.stdout.write(self.style.WARNING("Nothing to link: no candidates found."))
            return

        self.stdout.write(
            f"[{now().isoformat()}] Found {total_candidates} page(s) to link "
            f"(limit={limit}, batch={batch}, dry_run={dry_run})"
        )

        linked = 0
        processed = 0

        # Przechodzimy po małych porcjach, z transakcją i blokadą rekordu (na czas zapisu)
        # żeby uniknąć wyścigów gdy równolegle pracują workery.
        page_ids = list(qs.values_list("id", flat=True))
        for i in range(0, len(page_ids), batch):
            chunk_ids = page_ids[i:i + batch]
            chunk = list(NetworkMonitoredPage.objects.filter(id__in=chunk_ids).order_by("id"))

            with transaction.atomic():
                for page in chunk:
                    processed += 1
                    url = (page.url or "").strip()
                    if not url:
                        continue

                    try:
                        ad = AdsManual.objects.get(url=url)
                    except AdsManual.DoesNotExist:
                        continue  # ktoś mógł usunąć w międzyczasie

                    if verbose_urls:
                        self.stdout.write(f"  link: page#{page.id} → ad#{ad.id}  {url}")

                    if not dry_run:
                        # zabezpieczenie przed wyścigami: blokujemy rekord do zapisu
                        page = NetworkMonitoredPage.objects.select_for_update().get(pk=page.pk)
                        if page.network_ad_manual != ad.id:
                            page.network_ad_manual = ad
                            page.save(update_fields=["network_ad_manual"])
                            linked += 1

            self.stdout.write(f"Processed {processed}/{total_candidates}… linked so far: {linked}")

        summary = f"Done. Processed={processed}, Linked={linked}, Skipped={processed - linked}"
        if dry_run:
            summary += " (dry-run)"
        self.stdout.write(self.style.SUCCESS(summary))
