from extractly.models import NetworkMonitoredPage, SourceNetwork
from asgiref.sync import sync_to_async
from django.db.models import Q

async def get_pages_to_process(
    enable="true", name=None, source_ids=None, order="id", include_fetched=False
):
    ns_q = Q()
    if enable != "all":
        ns_q &= Q(enabled=(enable == "true"))
    if name:
        name_q = Q()
        for n in name:
            name_q |= Q(name__icontains=n)
        ns_q &= name_q
    if source_ids:
        ns_q &= Q(id__in=source_ids)

    ns_qs = SourceNetwork.objects.filter(ns_q)
    ns_ids = await sync_to_async(list)(ns_qs.values_list("id", flat=True))

    if not ns_ids:
        return NetworkMonitoredPage.objects.none()

    pages_qs = NetworkMonitoredPage.objects.filter(
        source_id__in=ns_ids
    ).order_by(order)
    if not include_fetched:
        pages_qs = pages_qs.filter(
            Q(sliced_html__isnull=True) |
            Q(sliced_html__exact="") | 
            Q(sliced_html__exact="{}")|
            Q(sliced_html__exact="[]")|
            Q(sliced_html__exact=" ")|
            Q(html__isnull=True) |
            Q(html__exact="") |
            Q(html__exact="error") |
            Q(html__exact="{}")|
            Q(html__exact="[]")|
            Q(html__exact=" ") |
            Q(html__exact="") 
            ).exclude(is_active=False)
    return pages_qs


