from django.core.management.base import BaseCommand
from django.db.models import Count, Min
from extractly.models import RawMonitoredLink

class Command(BaseCommand):
    help = "Usuwa duplikaty z RawMonitoredLink, zostawiając najstarszy (po id) dla każdego (url, source)."

    def handle(self, *args, **options):
        self.stdout.write("Szukam duplikatów RawMonitoredLink...")

        dupes = (
            RawMonitoredLink.objects
            .values('url', 'source')
            .annotate(count=Count('id'), min_id=Min('id'))
            .filter(count__gt=1)
        )

        dupes_count = 0
        for d in dupes:
            qs = RawMonitoredLink.objects.filter(
                url=d['url'],
                source=d['source']
            ).exclude(id=d['min_id'])
            deleted, _ = qs.delete()
            dupes_count += deleted

        self.stdout.write(self.style.SUCCESS(f"Usunięto {dupes_count} duplikatów z RawMonitoredLink."))
