# -*- coding: utf-8 -*-
# your_app/management/commands/images_health.py
import csv
import itertools
import logging
from typing import List, Optional

from django.apps import apps
from django.core.management.base import BaseCommand
from django.db.models import F, Q
from django.db.models.functions import Cast
from django.db.models import CharField

try:
    import httpx
except Exception:  # pragma: no cover
    httpx = None  # HEAD/GET checks będą niedostępne bez httpx

logger = logging.getLogger(__name__)

DEFAULT_ALLOWED_PREFIXES = [
    # twój bucket OVH (przykład z rozmowy)
    "https://extractly.s3.waw.io.cloud.ovh.net/",
    # dopisz własne aliasy/CDN, jeśli istnieją:
    # "https://cdn.extractly.cloud/",
    # "https://images.hously.cloud/",
]

ALLOWED_MIME = {"image/jpeg", "image/png", "image/webp", "image/gif"}
CONTENT_TYPE_FIX = {"image/jpg": "image/jpeg"}

HTTP_TIMEOUT = 12.0  # całość requestu
HTTP_CONNECT = 4.0
HTTP_READ = 8.0


class Command(BaseCommand):
    help = (
        "Health-check pola 'images' (i opcjonalnie 'original_image_urls').\n"
        "Wypisuje statystyki i wykryte problemy; opcjonalnie zapisuje CSV.\n"
        "Nie modyfikuje danych."
    )

    def add_arguments(self, parser):
        parser.add_argument(
            "--model",
            default="extractly.AdsManual",
            help="Model w postaci app_label.ModelName (domyślnie: extractly.AdsManual)",
        )
        parser.add_argument(
            "--images-field",
            default="images",
            help="Nazwa pola z obrazkami (domyślnie: images)",
        )
        parser.add_argument(
            "--originals-field",
            default="original_image_urls",
            help="Nazwa pola z oryginalnymi URL-ami (domyślnie: original_image_urls)",
        )
        parser.add_argument(
            "--allow-prefix",
            action="append",
            dest="allowed_prefixes",
            help="Dodatkowy dozwolony prefiks URL (można podać wiele razy). Jeśli brak – użyje domyślnych.",
        )
        parser.add_argument(
            "--batch-size",
            type=int,
            default=2000,
            help="Wielkość batcha iteratora dla odczytu z DB (domyślnie 2000).",
        )
        parser.add_argument(
            "--limit",
            type=int,
            default=0,
            help="Maksymalna liczba rekordów do sprawdzenia w trybie online (0 = bez limitu).",
        )
        parser.add_argument(
            "--check-online",
            action="store_true",
            help="Wykonaj sprawdzenie HTTP (HEAD->GET fallback) dla URL w images.",
        )
        parser.add_argument(
            "--method",
            choices=["HEAD", "GET"],
            default="HEAD",
            help="Metoda HTTP do sprawdzania URL (domyślnie HEAD; GET jeśli serwer nie obsługuje HEAD).",
        )
        parser.add_argument(
            "--report-csv",
            default="",
            help="Ścieżka do pliku CSV z listą problemów (opcjonalne).",
        )

    def handle(self, *args, **opts):
        model_label: str = opts["model"]
        images_field: str = opts["images_field"]
        originals_field: str = opts["originals_field"]
        batch_size: int = opts["batch_size"]
        limit: int = int(opts["limit"] or 0)
        check_online: bool = bool(opts["check_online"])
        method: str = opts["method"]
        report_csv: str = opts["report_csv"]
        allowed: List[str] = opts.get("allowed_prefixes") or DEFAULT_ALLOWED_PREFIXES
        allowed = [p if p.endswith("/") else p + "/" for p in allowed]

        try:
            Model = apps.get_model(model_label)
        except LookupError:
            self.stderr.write(self.style.ERROR(f"Model '{model_label}' nie istnieje."))
            return

        # Pola muszą istnieć
        field_names = {f.name for f in Model._meta.get_fields()}
        if images_field not in field_names:
            self.stderr.write(self.style.ERROR(f"Pole '{images_field}' nie istnieje w {model_label}."))
            return
        if originals_field not in field_names:
            self.stdout.write(self.style.WARNING(
                f"Uwaga: pole '{originals_field}' nie istnieje – pomijam porównanie z oryginałami."
            ))

        # Zliczanie statystyk (szybkie, po DB)
        # 1) ile images != NULL
        non_null_qs = Model.objects.filter(**{f"{images_field}__isnull": False})
        total_non_null = non_null_qs.count()
        total_all = Model.objects.count()
        self.stdout.write(f"Rekordów ogółem: {total_all}")
        self.stdout.write(f"Rekordów z {images_field} != NULL: {total_non_null}")

        # 2) images jako tekst (string) – wykorzystaj adnotację Cast(JSON->text)
        annotated = non_null_qs.annotate(_img_text=Cast(F(images_field), output_field=CharField()))

        # 3) prefix check (tylko dla stringów; listy/dicty będą miały _img_text w JSON-owej postaci)
        cond = Q()
        for p in allowed:
            cond |= Q(_img_text__startswith=p)

        # Kandydaci nie-nasi (mogą być: nie-string, string innego hosta, albo JSON list/dict)
        not_ours_qs = annotated.exclude(cond)
        not_ours_count = not_ours_qs.count()
        self.stdout.write(f"Rekordów potencjalnie nie-naszych (prefix mismatch lub non-string): {not_ours_count}")

        # 4) images == original_image_urls (jeśli pole istnieje)
        eq_count = 0
        if originals_field in field_names:
            eq_count = Model.objects.filter(
                **{f"{images_field}__isnull": False}
            ).filter(
                **{f"{images_field}": F(originals_field)}
            ).count()
            self.stdout.write(f"Rekordów z {images_field} == {originals_field}: {eq_count}")

        # Przygotowanie CSV (opcjonalnie)
        csv_writer = None
        csv_file = None
        if report_csv:
            csv_file = open(report_csv, "w", newline="", encoding="utf-8")
            csv_writer = csv.writer(csv_file)
            csv_writer.writerow([
                "pk",
                "issue",
                "images_preview",
                "http_status",
                "content_type",
                "content_length",
            ])

        # 5) Online checks (HEAD/GET)
        online_checked = 0
        online_bad = 0
        online_ct_bad = 0
        online_len_zero = 0

        # użyj tylko rekordów, które *na 99%* są stringiem URL naszego hosta
        ours_only_qs = annotated.filter(cond).order_by("pk")

        if check_online:
            if httpx is None:
                self.stderr.write(self.style.ERROR("Brak biblioteki httpx – zainstaluj `httpx`."))
            else:
                timeout = httpx.Timeout(HTTP_TIMEOUT, connect=HTTP_CONNECT, read=HTTP_READ)
                headers = {
                    "User-Agent": "ImagesHealth/1.0 (+healthcheck)",
                    "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*;q=0.8,*/*;q=0.5",
                }
                client = httpx.Client(timeout=timeout, headers=headers, follow_redirects=True)

                try:
                    iterator = ours_only_qs.values("pk", "_img_text").iterator(chunk_size=batch_size)
                    while True:
                        batch = list(itertools.islice(iterator, batch_size))
                        if not batch:
                            break
                        for row in batch:
                            if limit and online_checked >= limit:
                                break

                            pk = row["pk"]
                            url = (row["_img_text"] or "").strip()
                            if not url:
                                continue

                            method_to_use = method
                            resp = None
                            try:
                                if method_to_use == "HEAD":
                                    resp = client.head(url)
                                    if resp.status_code >= 400 or not resp.headers.get("content-type"):
                                        # niektóre serwery nie obsługują HEAD – fallback do GET
                                        resp = client.get(url, stream=True)
                                        method_to_use = "GET"
                                else:
                                    resp = client.get(url, stream=True)

                                online_checked += 1
                                status = resp.status_code
                                ct = (resp.headers.get("content-type", "") or "").split(";")[0].strip().lower()
                                ct = CONTENT_TYPE_FIX.get(ct, ct)
                                clen = int(resp.headers.get("content-length") or 0)

                                # reguły zdrowia:
                                # - status < 400
                                # - content-type z ALLOWED_MIME (jeśli nie GET, to może być nieustalone – akceptujemy, ale odnotujemy)
                                # - content-length > 0 (jeśli podane)
                                issue = None
                                if status >= 400:
                                    issue = f"http_error_{status}"
                                    online_bad += 1
                                elif ct and ct not in ALLOWED_MIME:
                                    issue = f"bad_content_type:{ct}"
                                    online_ct_bad += 1
                                elif clen == 0 and method_to_use == "GET":
                                    # przy HEAD content-length bywa 0 – nie uznajemy tego za błąd
                                    issue = "zero_length"
                                    online_len_zero += 1

                                if csv_writer and issue:
                                    csv_writer.writerow([
                                        pk,
                                        issue,
                                        url[:200],
                                        status,
                                        ct,
                                        clen,
                                    ])

                            except Exception as e:
                                online_checked += 1
                                online_bad += 1
                                if csv_writer:
                                    csv_writer.writerow([
                                        pk,
                                        f"exception:{type(e).__name__}",
                                        url[:200],
                                        "",
                                        "",
                                        "",
                                    ])
                            finally:
                                if resp is not None and method_to_use == "GET":
                                    try:
                                        resp.close()
                                    except Exception:
                                        pass
                        if limit and online_checked >= limit:
                            break
                finally:
                    try:
                        client.close()
                    except Exception:
                        pass

        # Raport końcowy
        self.stdout.write("\n=== PODSUMOWANIE ===")
        self.stdout.write(f"Ogółem rekordów: {total_all}")
        self.stdout.write(f"{images_field} != NULL: {total_non_null}")
        self.stdout.write(f"Prefix mismatch / non-string: {not_ours_count}")
        if originals_field in field_names:
            self.stdout.write(f"{images_field} == {originals_field}: {eq_count}")

        if check_online:
            self.stdout.write(f"\nSprawdzeń online (prefix OK): {online_checked}")
            self.stdout.write(f"HTTP błędy (>=400/exception): {online_bad}")
            self.stdout.write(f"Zły Content-Type: {online_ct_bad}")
            self.stdout.write(f"Zero Content-Length (GET): {online_len_zero}")

        if csv_writer:
            csv_file.close()
            self.stdout.write(self.style.SUCCESS(f"Raport zapisany do: {report_csv}"))
