# -*- coding: utf-8 -*-
# your_app/management/commands/export_images_map.py

import csv
import json
import itertools
from typing import List
from django.db.models.functions import Cast, Coalesce
from django.db.models import CharField, Value
from django.apps import apps
from django.core.management.base import BaseCommand
from django.db.models import F, Q
from django.db.models.functions import Cast
from django.db.models import CharField
try:
    from django.contrib.postgres.fields.jsonb import KeyTextTransform
except Exception:
    KeyTextTransform = None

DEFAULT_ALLOWED_PREFIXES = [
    "https://extractly.s3.waw.io.cloud.ovh.net/",
    # dopisz inne CDN/aliasy jeśli używasz:
    # "https://cdn.extractly.cloud/",
    # "https://images.hously.cloud/",
]

class Command(BaseCommand):
    help = (
        "Eksportuje mapę url->images do pliku (CSV lub JSONL).\n"
        "Domyślnie eksportuje tylko nasze linki (prefiks)."
    )

    def add_arguments(self, parser):
        parser.add_argument("--model", default="extractly.AdsManual",
                            help="app_label.ModelName (domyślnie: extractly.AdsManual)")
        parser.add_argument("--images", default="images",
                            help="Nazwa pola z obrazkiem (domyślnie: images)")
        parser.add_argument("--url", default="url",
                            help="Nazwa pola URL (domyślnie: url)")
        parser.add_argument("--outfile", required=True,
                            help="Ścieżka wyjściowa: *.csv lub *.jsonl")
        parser.add_argument("--allow-prefix", action="append", dest="allowed_prefixes",
                            help="Dodatkowy dozwolony prefiks URL (można podać wiele razy).")
        parser.add_argument("--all", action="store_true",
                            help="Eksportuj wszystkie images (również nie nasze).")
        parser.add_argument("--batch-size", type=int, default=5000,
                            help="Rozmiar batcha odczytu (domyślnie 5000).")

    def handle(self, *args, **opts):
        model_label = opts["model"]
        images_field = opts["images"]
        url_field = opts["url"]
        outfile = opts["outfile"]
        batch_size = opts["batch_size"]
        allowed: List[str] = opts.get("allowed_prefixes") or DEFAULT_ALLOWED_PREFIXES
        allowed = [p if p.endswith("/") else p + "/" for p in allowed]
        export_all = bool(opts["all"])

        Model = apps.get_model(model_label)

        qs = (Model.objects
            .filter(**{f"{images_field}__isnull": False})
            .exclude(**{f"{url_field}__isnull": True})
            .exclude(**{f"{url_field}": ""})
            .order_by("pk"))

        # --- NOWA adnotacja _img_text ---
        if KeyTextTransform is not None:
            # spróbuj najpierw wyciągnąć images['main'] jako tekst
            qs = qs.annotate(
                _img_main=KeyTextTransform('main', images_field),
                _img_text_raw=Cast(F(images_field), output_field=CharField())
            ).annotate(
                _img_text=Coalesce(F('_img_main'), F('_img_text_raw'), Value(''))
            )
        else:
            qs = qs.annotate(_img_text=Cast(F(images_field), output_field=CharField()))

        # jeśli NIE --all, filtrujemy po prefiksach
        if not export_all:
            cond = Q()
            for p in allowed:
                cond |= Q(_img_text__startswith=p)
            qs = qs.filter(cond)

        total = qs.count()
        self.stdout.write(f"Do eksportu rekordów: {total}")

        if outfile.lower().endswith(".jsonl"):
            with open(outfile, "w", encoding="utf-8") as f:
                iterator = qs.values("pk", url_field, "_img_text").iterator(chunk_size=batch_size)
                for row in iterator:
                    url = row[url_field]
                    img = (row["_img_text"] or "").strip()
                    if not img:
                        continue
                    f.write(json.dumps({"pk": row["pk"], "url": url, "images": img}, ensure_ascii=False) + "\n")
            self.stdout.write(self.style.SUCCESS(f"Zapisano JSONL: {outfile}"))
        else:
            # CSV
            with open(outfile, "w", newline="", encoding="utf-8") as f:
                w = csv.writer(f)
                w.writerow(["pk", "url", "images"])
                iterator = qs.values("pk", url_field, "_img_text").iterator(chunk_size=batch_size)
                for row in iterator:
                    url = row[url_field]
                    img = (row["_img_text"] or "").strip()
                    if not img:
                        continue
                    w.writerow([row["pk"], url, img])
            self.stdout.write(self.style.SUCCESS(f"Zapisano CSV: {outfile}"))

