# -*- coding: utf-8 -*-
# your_app/management/commands/clean_owned_images.py

from django.core.management.base import BaseCommand
from django.db import connection
from django.db.models import Q, F, CharField, Value
from django.db.models.functions import Cast
from django.apps import apps
from django.db.models.expressions import Func

import itertools

# Postgres-only helper (gdy dostępny)
try:
    from django.contrib.postgres.fields.jsonb import KeyTextTransform
except Exception:
    KeyTextTransform = None

DEFAULT_ALLOWED_PREFIXES = [
    "https://extractly.s3.waw.io.cloud.ovh.net",
    # dodaj ewentualne CDN-y / aliasy:
    # "https://cdn.extractly.cloud/",
    # "https://images.hously.cloud/",
]


class JsonbScalarText(Func):
    """
    Ujednolicone wydobycie tekstu ze skalaru JSON:
    TRIM(CAST(expr AS TEXT)) — usuwa otaczające cudzysłowy (np. '"https://...jpg"').
    """
    template = "trim(both '\"' from CAST(%(expressions)s AS text))"
    output_field = CharField()


def sqlite_json_extract(expr, path: str):
    """
    SQLite JSON1: json_extract(expr, '$.main') / '$[0]'
    Zwraca TEXT bez dodatkowych cudzysłowów.
    """
    return Func(expr, Value(path), function="json_extract", output_field=CharField())


def mysql_json_extract_unquote(expr, path: str):
    """
    MySQL: JSON_UNQUOTE(JSON_EXTRACT(expr, '$.main')) / '$[0]'
    """
    raw = Func(expr, Value(path), function="JSON_EXTRACT", output_field=CharField())
    return Func(raw, function="JSON_UNQUOTE", output_field=CharField())


class Command(BaseCommand):
    help = (
        "Czyści pole 'images' z wartości niepochodzących z naszego storage.\n"
        "FAST+smart: obsługa string, dict['main'], array[0] dla Postgres/SQLite/MySQL.\n"
        "W trybie --deep filtruje listy/dicty po stronie Pythona i zostawia tylko nasze URL-e."
    )

    def add_arguments(self, parser):
        parser.add_argument("--model", default="extractly.AdsManual",
                            help="app_label.ModelName (domyślnie: extractly.AdsManual)")
        parser.add_argument("--field", default="images",
                            help="Nazwa pola z obrazkami (domyślnie: images)")
        parser.add_argument("--allow-prefix", action="append", dest="allowed_prefixes",
                            help="Dozwolony prefiks URL (można podać wiele razy). Jeśli brak, użyje domyślnych.")
        parser.add_argument("--dry-run", action="store_true",
                            help="Tylko pokaż liczby, nie zapisuj zmian.")
        parser.add_argument("--deep", action="store_true",
                            help="Analiza JSON (list/dict) po stronie Pythona; zostawia tylko nasze URL-e.")
        parser.add_argument("--batch-size", type=int, default=2000,
                            help="Wielkość batcha dla trybu --deep (domyślnie 2000).")
        parser.add_argument("--set-empty", action="store_true",
                            help="W --deep: zamiast NULL ustaw pustą listę [].")

    def handle(self, *args, **opts):
        model_label = opts["model"]
        field_name = opts["field"]
        dry = opts["dry_run"]
        deep = opts["deep"]
        batch_size = opts["batch_size"]
        set_empty = opts["set_empty"]

        allowed = opts.get("allowed_prefixes") or DEFAULT_ALLOWED_PREFIXES
        # normalizuj na końcowy slash
        allowed = [p if p.endswith("/") else p + "/" for p in allowed]

        # Model
        try:
            Model = apps.get_model(model_label)
        except LookupError:
            self.stderr.write(self.style.ERROR(f"Model '{model_label}' nie istnieje."))
            return

        # Pole
        if field_name not in {f.name for f in Model._meta.get_fields()}:
            self.stderr.write(self.style.ERROR(f"Pole '{field_name}' nie istnieje w modelu {model_label}."))
            return

        vendor = connection.vendor  # 'postgresql' | 'sqlite' | 'mysql' | ...
        # self.stdout.write(f"[INFO] DB backend: {vendor}")

        # -------------------------- FAST + smart ------------------------------
        if not deep:
            qs = Model.objects.filter(**{f"{field_name}__isnull": False})

            # 1) skalar-string: TRIM(CAST(json AS TEXT))
            qs = qs.annotate(_img_scalar=JsonbScalarText(F(field_name)))
            cond_scalar = Q()
            for p in allowed:
                cond_scalar |= Q(_img_scalar__startswith=p)

            # 2) dict['main'] i 3) array[0] — zależnie od backendu
            cond_main = Q()
            cond_arr0 = Q()

            if vendor == "postgresql" and KeyTextTransform is not None:
                qs = qs.annotate(
                    _img_main=KeyTextTransform("main", field_name),  # images->>'main'
                    _img_arr0=KeyTextTransform("0", field_name),     # images->>0
                )
                for p in allowed:
                    cond_main |= Q(_img_main__startswith=p)
                    cond_arr0 |= Q(_img_arr0__startswith=p)

            elif vendor == "sqlite":
                qs = qs.annotate(
                    _img_main=sqlite_json_extract(F(field_name), "$.main"),
                    _img_arr0=sqlite_json_extract(F(field_name), "$[0]"),
                )
                for p in allowed:
                    cond_main |= Q(_img_main__startswith=p)
                    cond_arr0 |= Q(_img_arr0__startswith=p)

            elif vendor == "mysql":
                qs = qs.annotate(
                    _img_main=mysql_json_extract_unquote(F(field_name), "$.main"),
                    _img_arr0=mysql_json_extract_unquote(F(field_name), "$[0]"),
                )
                for p in allowed:
                    cond_main |= Q(_img_main__startswith=p)
                    cond_arr0 |= Q(_img_arr0__startswith=p)

            # nasze = cokolwiek spełnia którykolwiek warunek
            ours_cond = cond_scalar | cond_main | cond_arr0

            to_clean_qs = qs.exclude(ours_cond)

            total = qs.count()
            bad = to_clean_qs.count()
            self.stdout.write(f"[FAST] Łącznie images != NULL: {total} | do wyczyszczenia (nie nasze): {bad}")

            if dry or bad == 0:
                self.stdout.write(self.style.SUCCESS("Dry-run lub brak do czyszczenia."))
                return

            updated = to_clean_qs.update(**{field_name: None})
            self.stdout.write(self.style.SUCCESS(f"[FAST] Wyczyszczono rekordów: {updated} (images=NULL)"))
            return

        # --------------------------- DEEP (Python) ----------------------------
        def is_ours(url: str) -> bool:
            if not url or not isinstance(url, str):
                return False
            u = url.strip()
            return any(u.startswith(p) for p in allowed)

        def normalize_and_filter(value):
            """
            Zwraca:
             - string naszego URL-a,
             - listę tylko naszych URL-i,
             - None jeśli nic nie pasuje / nie ma URL-i.
            Obsługuje: string, listy stringów/dictów, dicty z kluczem 'main'/'url'/'src'/'href' itd.
            """
            if value is None:
                return None

            # string
            if isinstance(value, str):
                return value if is_ours(value) else None

            # dict
            if isinstance(value, dict):
                for k in ("main", "url", "src", "href"):
                    v = value.get(k)
                    if isinstance(v, str) and is_ours(v):
                        return v
                    if isinstance(v, (list, tuple)):
                        kept = [x for x in v if isinstance(x, str) and is_ours(x)]
                        if kept:
                            return kept
                kept = []
                for v in value.values():
                    if isinstance(v, str) and is_ours(v):
                        kept.append(v)
                    elif isinstance(v, (list, tuple)):
                        kept.extend([x for x in v if isinstance(x, str) and is_ours(x)])
                return kept or None

            # lista
            if isinstance(value, (list, tuple)):
                kept = []
                for item in value:
                    if isinstance(item, str) and is_ours(item):
                        kept.append(item)
                    elif isinstance(item, dict):
                        for k in ("main", "url", "src", "href"):
                            v = item.get(k)
                            if isinstance(v, str) and is_ours(v):
                                kept.append(v)
                # dedupe
                kept = list(dict.fromkeys(kept))
                return kept or None

            return None

        base_qs = Model.objects.filter(**{f"{field_name}__isnull": False}).order_by("pk")
        total = base_qs.count()
        if total == 0:
            self.stdout.write("Brak rekordów do sprawdzenia.")
            return

        self.stdout.write(f"[DEEP] Kandydatów (images != NULL): {total}")
        cleaned, trimmed, processed = 0, 0, 0

        iterator = base_qs.values("pk", field_name).iterator(chunk_size=batch_size)
        while True:
            batch = list(itertools.islice(iterator, batch_size))
            if not batch:
                break

            to_null = []
            to_set = []
            for row in batch:
                pk = row["pk"]
                val = row[field_name]
                new_val = normalize_and_filter(val)

                if new_val is None:
                    to_null.append(pk)
                else:
                    if new_val != val:
                        to_set.append((pk, new_val))

            processed += len(batch)

            if not dry:
                if to_null:
                    Model.objects.filter(pk__in=to_null).update(**{field_name: ([] if set_empty else None)})
                for pk, nv in to_set:
                    Model.objects.filter(pk=pk).update(**{field_name: nv})

            cleaned += len(to_null)
            trimmed += len(to_set)
            self.stdout.write(f"[DEEP] processed={processed}/{total} | set NULL/[]: {len(to_null)} | trimmed/updated: {len(to_set)}")

        if dry:
            self.stdout.write(self.style.SUCCESS(
                f"[DEEP][DRY] Do wyczyszczenia (NULL/[]): {cleaned} | Do przycięcia (pozostawiono tylko nasze): {trimmed}"
            ))
        else:
            self.stdout.write(self.style.SUCCESS(
                f"[DEEP] Wyczyszczono (NULL/[]): {cleaned} | Przycięto (pozostawiono tylko nasze): {trimmed}"
            ))
