"""
Quick test script to verify Morizon HTML collection is working.
This script tests one page to verify the configuration is correct.
"""
import os
import sys
import django
import asyncio
from playwright.async_api import async_playwright

# Setup Django
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'NetworkMonitoring.settings')
django.setup()

from extractly.models import SourceHtml, NetworkMonitoredPage
from html_agregator.html_fetcher import fetch_and_save_html_for_pages
from django.utils import timezone

async def test_one_page():
    print("=" * 60)
    print("Testing Morizon HTML Collection")
    print("=" * 60)
    
    # Get configuration
    html_cfg = await asyncio.to_thread(
        lambda: SourceHtml.objects.get(source_id='779dc8fa-ef0b-4627-8cf6-755f3375d949')
    )
    print(f"\n✓ Configuration loaded:")
    print(f"  - Portal: {html_cfg.name}")
    print(f"  - Actions: {len(html_cfg.actions) if html_cfg.actions else 0}")
    print(f"  - Selectors keys: {list(html_cfg.selectors.keys())}")
    print(f"  - Content field: {html_cfg.selectors['sliced_html'].get('content_field')}")
    print(f"  - Content type ID: {html_cfg.selectors['sliced_html'].get('content_type')}")
    
    # Get test page
    test_page = await asyncio.to_thread(
        lambda: NetworkMonitoredPage.objects.get(id=9793667)
    )
    print(f"\n✓ Test page loaded:")
    print(f"  - URL: {test_page.url}")
    print(f"  - Current HTML: {len(test_page.html) if test_page.html else 0} bytes")
    print(f"  - Current sliced: {len(test_page.sliced_html) if test_page.sliced_html else 0} bytes")
    
    # Fetch HTML
    print(f"\n→ Starting HTML collection...")
    async with async_playwright() as p:
        browser = await p.chromium.launch(headless=True)
        page = await browser.new_page()
        
        try:
            pages = await asyncio.to_thread(
                lambda: list(NetworkMonitoredPage.objects.filter(id=test_page.id))
            )
            await fetch_and_save_html_for_pages(page, pages=pages)
            print(f"\n✓ HTML collection completed!")
        finally:
            await browser.close()
    
    # Check result
    test_page = await asyncio.to_thread(
        lambda: NetworkMonitoredPage.objects.get(id=9793667)
    )
    print(f"\n✓ Result:")
    print(f"  - HTML: {len(test_page.html) if test_page.html else 0:,} bytes")
    print(f"  - Sliced HTML: {len(test_page.sliced_html) if test_page.sliced_html else 0:,} bytes")
    print(f"  - Is fetched: {test_page.is_fetched}")
    print(f"  - Date fetched: {test_page.date_fetched}")
    
    if test_page.html and len(test_page.html) > 1000:
        print(f"\n✅ SUCCESS! HTML is being saved correctly!")
    else:
        print(f"\n❌ FAILED! HTML is still not being saved.")
    
    print("=" * 60)

if __name__ == "__main__":
    asyncio.run(test_one_page())
