from django.core.management.base import BaseCommand
from extractly.models import NetworkMonitoredPage
from asgiref.sync import sync_to_async, async_to_sync
from html_agregator.utils.raw_data import process_single_page
from django.db.models import Q

class Command(BaseCommand):
    help = "Generuje raw_data i image_links z pola html lub sliced_html, z użyciem selektorów i configu."

    def add_arguments(self, parser):
        parser.add_argument('--id', type=int, help="ID rekordu do przetworzenia")
        parser.add_argument('--name', nargs='*', help="Filtruj po nazwie źródła (case-insensitive, można kilka)")
        parser.add_argument('--config', choices=['true', 'false', 'all'], default='all', help="Filtruj czy jest konfiguracja htmlnetworksource")
        parser.add_argument('--headless', action='store_true', default=False, help="Uruchom bez UI (headless)")
        parser.add_argument('--include-fetched', action='store_true', default=False, help="Pobierz także już przetworzone (is_fetched=True)")
        parser.add_argument('--raw-data-empty', choices=['true', 'false', 'all'], default='true', help="Filtruj po pustym raw_data")
        parser.add_argument('--sliced-html-empty', choices=['true', 'false', 'all'], default='true', help="Filtruj po pustym sliced_html")

    def handle(self, *args, **options):
        async_to_sync(self.run)(options)

    async def run(self, options):
        record_id = options.get("id")
        batch_size = 2
        total_processed = 0

        if record_id:
            pages = await sync_to_async(list)(
                NetworkMonitoredPage.objects.filter(id=record_id, html__isnull=False).exclude(html='')
            )
            total_records = len(pages)
            self.stdout.write(f"🔄 Przetwarzam 1 rekord...")
            for obj in pages:
                await process_single_page(obj, 1, 1, self.stdout.write)
            return

        # Budujemy queryset dynamicznie
        qs = NetworkMonitoredPage.objects.all()
        qs = qs.filter(html__isnull=False).exclude(html='')

        # raw_data_empty (null, '', [], {}, '[]', '{}', oraz {"sliced_gallery_html": "", "sliced_container_html": ""})
        RAW_DATA_EMPTY_DICT = {"sliced_gallery_html": "", "sliced_container_html": ""}
        if options['raw_data_empty'] == 'true':
            qs = qs.filter(
                Q(raw_data__isnull=True) |
                Q(raw_data='') |
                Q(raw_data=[]) |
                Q(raw_data={}) |
                Q(raw_data='[]') |
                Q(raw_data='{}') 
            ).exclude(is_active=False)
        elif options['raw_data_empty'] == 'false':
            qs = qs.exclude(
                Q(raw_data__isnull=True) |
                Q(raw_data='') |
                Q(raw_data=[]) |
                Q(raw_data={}) |
                Q(raw_data='[]') |
                Q(raw_data='{}') 
            ).exclude(is_active=False)

        # # sliced_html_empty
        # if options['sliced_html_empty'] == 'true':
        #     qs = qs.filter(Q(sliced_html__isnull=True) | Q(sliced_html=''))
        # elif options['sliced_html_empty'] == 'false':
        #     qs = qs.exclude(Q(sliced_html__isnull=True) | Q(sliced_html=''))

        # is_raw_data (czy przetworzony)
        if not options['include_fetched']:
            qs = qs.filter(is_raw_data=False)

        # name (nazwa źródła)
        if options['name']:
            qs = qs.filter(source__name__iregex="|".join(options['name']))

        # config (czy istnieje powiązany config)
        if options['config'] == 'true':
            qs = qs.filter(source__html_config__isnull=False)
        elif options['config'] == 'false':
            qs = qs.filter(source__html_config__isnull=True)

        total_records = await sync_to_async(qs.count)()
        self.stdout.write(f"🔄 Do przetworzenia: {total_records} rekordów.")

        if total_records == 0:
            self.stdout.write("✅ Brak rekordów do przetworzenia.")
            return

        batch_total = (total_records + batch_size - 1) // batch_size

        for batch_index in range(batch_total):
            offset = batch_index * batch_size
            current_qs = qs.order_by("-id")[offset:offset + batch_size]
            pages = await sync_to_async(list)(current_qs)

            self.stdout.write(f"\n📦 Batch {batch_index + 1}/{batch_total} (rozmiar: {len(pages)})")

            for obj in pages:
                total_processed += 1
                await process_single_page(obj, total_processed, total_records, self.stdout.write)