import os
import sys
import django

# Setup Django
sys.path.insert(0, os.path.dirname(os.path.abspath(__file__)))
os.chdir(r'C:\Users\jerin\Desktop\Poland_AI\Working\Server_Branch_data\extractly')
os.environ.setdefault('DJANGO_SETTINGS_MODULE', 'houslyspace.settings')
django.setup()

from extractly.models import SourceHtml

# Get Morizon configuration
sh = SourceHtml.objects.get(source_id='779dc8fa-ef0b-4627-8cf6-755f3375d949')

print("=== BEFORE CLEANUP ===")
print(f"Actions length: {len(sh.actions) if sh.actions else 0}")
print(f"Selectors keys: {list(sh.selectors.keys()) if sh.selectors else []}")

# Clean actions - remove _comment fields
if sh.actions:
    sh.actions = [
        {k: v for k, v in action.items() if not k.startswith('_')}
        for action in sh.actions
    ]

# Clean selectors - remove documentation fields recursively
def clean_dict(obj):
    if isinstance(obj, dict):
        return {
            k: clean_dict(v) 
            for k, v in obj.items() 
            if not k.startswith('_')
        }
    elif isinstance(obj, list):
        return [clean_dict(item) for item in obj]
    return obj

if sh.selectors:
    # Clean all documentation fields
    cleaned_selectors = clean_dict(sh.selectors)
    
    # Remove empty list and detail keys - only keep sliced_html
    sh.selectors = {
        'sliced_html': cleaned_selectors.get('sliced_html')
    }

# Clean inactive - remove _comment fields
if sh.inactive:
    sh.inactive = clean_dict(sh.inactive)

# Save changes
sh.save()

print("\n=== AFTER CLEANUP ===")
print(f"Actions length: {len(sh.actions) if sh.actions else 0}")
print(f"Selectors keys: {list(sh.selectors.keys()) if sh.selectors else []}")
print(f"Sliced_html has these extraction keys: {[k for k in sh.selectors.get('sliced_html', {}).keys() if k.startswith('sliced_')]}")

print("\n✅ Configuration cleaned successfully!")
print("\nNow test with:")
print("  python manage.py run_html --name morizon --limit 3 --include-fetched --headless")
