From e6a80decb457c11f20808e078e21d32c8abdff04 Mon Sep 17 00:00:00 2001 From: David Jean Louis Date: Tue, 2 Mar 2021 13:20:09 +0100 Subject: [PATCH 1/5] Do not blindly delete duplicate schedules Because there are no unique constraints on schedule models (except for SolarSchedule), schedules tables can contain duplicates, those duplicate schedules should not be blindly deleted when encountered because they can be linked to existing tasks and would cause the tasks to be also deleted (on delete cascade). Instead we now just return the first duplicate found, just to avoid creating further duplicates. This fixes issue #322. --- django_celery_beat/models.py | 14 ++++----- t/unit/test_models.py | 58 ++++++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+), 8 deletions(-) diff --git a/django_celery_beat/models.py b/django_celery_beat/models.py index 08cd904f..474dc4c4 100644 --- a/django_celery_beat/models.py +++ b/django_celery_beat/models.py @@ -122,8 +122,9 @@ def from_schedule(cls, schedule): except cls.DoesNotExist: return cls(**spec) except MultipleObjectsReturned: - cls.objects.filter(**spec).delete() - return cls(**spec) + # unique_together constraint should not permit reaching this code, + # but just in case, we return the first schedule found + return cls.objects.filter(**spec).first() def __str__(self): return '{0} ({1}, {2})'.format( @@ -183,8 +184,7 @@ def from_schedule(cls, schedule, period=SECONDS): except cls.DoesNotExist: return cls(every=every, period=period) except MultipleObjectsReturned: - cls.objects.filter(every=every, period=period).delete() - return cls(every=every, period=period) + return cls.objects.filter(every=every, period=period).first() def __str__(self): readable_period = None @@ -236,8 +236,7 @@ def from_schedule(cls, schedule): except cls.DoesNotExist: return cls(**spec) except MultipleObjectsReturned: - cls.objects.filter(**spec).delete() - return cls(**spec) + return cls.objects.filter(**spec).first() class CrontabSchedule(models.Model): @@ -350,8 +349,7 @@ def from_schedule(cls, schedule): except cls.DoesNotExist: return cls(**spec) except MultipleObjectsReturned: - cls.objects.filter(**spec).delete() - return cls(**spec) + return cls.objects.filter(**spec).first() class PeriodicTasks(models.Model): diff --git a/t/unit/test_models.py b/t/unit/test_models.py index 337cc9e6..c41092ab 100644 --- a/t/unit/test_models.py +++ b/t/unit/test_models.py @@ -7,6 +7,7 @@ from django.db.migrations.autodetector import MigrationAutodetector from django.db.migrations.loader import MigrationLoader from django.db.migrations.questioner import NonInteractiveMigrationQuestioner +from django.utils import timezone import timezone_field @@ -14,6 +15,9 @@ from django_celery_beat.models import ( crontab_schedule_celery_timezone, SolarSchedule, + CrontabSchedule, + ClockedSchedule, + IntervalSchedule, ) @@ -74,6 +78,20 @@ def test_default_timezone_without_settings_config(self): def test_default_timezone_with_settings_config(self): assert crontab_schedule_celery_timezone() == self.FIRST_VALID_TIMEZONE + def test_duplicate_schedules(self): + # See: https://github.com/celery/django-celery-beat/issues/322 + # create 2 duplicates schedules + sched1 = CrontabSchedule.objects.create(hour="4") + sched2 = CrontabSchedule.objects.create(hour="4") + self.assertEqual(CrontabSchedule.objects.count(), 2) + # try to create a duplicate CrontabSchedule from a celery schedule + schedule = schedules.crontab(hour="4") + sched3 = CrontabSchedule.from_schedule(schedule) + # the schedule should be the first of the 2 previous duplicates + self.assertEqual(sched3, sched1) + # and the duplicates should not be deleted ! + self.assertEqual(CrontabSchedule.objects.count(), 2) + class SolarScheduleTestCase(TestCase): EVENT_CHOICES = SolarSchedule._meta.get_field("event").choices @@ -99,3 +117,43 @@ def test_celery_solar_schedules_included_as_event_choices(self): for event_choice in event_choices_values: assert event_choice in schedules.solar._all_events + + def test_duplicate_schedules(self): + # Duplicates cannot be tested for solar schedules because of the + # unique constraints in the SolarSchedule model + pass + + +class IntervalScheduleTestCase(TestCase): + + def test_duplicate_schedules(self): + # See: https://github.com/celery/django-celery-beat/issues/322 + kwargs = {'every': 1, 'period': IntervalSchedule.SECONDS} + # create 2 duplicates schedules + sched1 = IntervalSchedule.objects.create(**kwargs) + sched2 = IntervalSchedule.objects.create(**kwargs) + self.assertEqual(IntervalSchedule.objects.count(), 2) + # try to create a duplicate IntervalSchedule from a celery schedule + schedule = schedules.schedule(run_every=1.0) + sched3 = IntervalSchedule.from_schedule(schedule) + # the schedule should be the first of the 2 previous duplicates + self.assertEqual(sched3, sched1) + # and the duplicates should not be deleted ! + self.assertEqual(IntervalSchedule.objects.count(), 2) + + +class ClockedScheduleTestCase(TestCase): + + def test_duplicate_schedules(self): + # See: https://github.com/celery/django-celery-beat/issues/322 + d = timezone.now() + # create 2 duplicates schedules + sched1 = ClockedSchedule.objects.create(clocked_time=d) + sched2 = ClockedSchedule.objects.create(clocked_time=d) + self.assertEqual(ClockedSchedule.objects.count(), 2) + # try to create a duplicate ClockedSchedule from a previous schedule + sched3 = ClockedSchedule.from_schedule(sched2.schedule) + # the schedule should be the first of the 2 previous duplicates + self.assertEqual(sched3, sched1) + # and the duplicates should not be deleted ! + self.assertEqual(ClockedSchedule.objects.count(), 2) From b8ca709d539955313bc21a856e67e83743e11419 Mon Sep 17 00:00:00 2001 From: David Jean Louis Date: Tue, 2 Mar 2021 13:53:38 +0100 Subject: [PATCH 2/5] Fixed flake8 warnings --- t/unit/test_models.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/t/unit/test_models.py b/t/unit/test_models.py index c41092ab..11173389 100644 --- a/t/unit/test_models.py +++ b/t/unit/test_models.py @@ -82,7 +82,7 @@ def test_duplicate_schedules(self): # See: https://github.com/celery/django-celery-beat/issues/322 # create 2 duplicates schedules sched1 = CrontabSchedule.objects.create(hour="4") - sched2 = CrontabSchedule.objects.create(hour="4") + CrontabSchedule.objects.create(hour="4") self.assertEqual(CrontabSchedule.objects.count(), 2) # try to create a duplicate CrontabSchedule from a celery schedule schedule = schedules.crontab(hour="4") @@ -131,7 +131,7 @@ def test_duplicate_schedules(self): kwargs = {'every': 1, 'period': IntervalSchedule.SECONDS} # create 2 duplicates schedules sched1 = IntervalSchedule.objects.create(**kwargs) - sched2 = IntervalSchedule.objects.create(**kwargs) + IntervalSchedule.objects.create(**kwargs) self.assertEqual(IntervalSchedule.objects.count(), 2) # try to create a duplicate IntervalSchedule from a celery schedule schedule = schedules.schedule(run_every=1.0) From 1069c4eabd641b0beac3cd862634de24360b2eec Mon Sep 17 00:00:00 2001 From: David Jean Louis Date: Tue, 2 Mar 2021 18:23:08 +0100 Subject: [PATCH 3/5] More robust tests --- t/unit/test_models.py | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/t/unit/test_models.py b/t/unit/test_models.py index 11173389..bc532c05 100644 --- a/t/unit/test_models.py +++ b/t/unit/test_models.py @@ -80,17 +80,25 @@ def test_default_timezone_with_settings_config(self): def test_duplicate_schedules(self): # See: https://github.com/celery/django-celery-beat/issues/322 + kwargs = { + "minute": "*", + "hour": "4", + "day_of_week": "*", + "day_of_month": "*", + "month_of_year": "*", + "day_of_week": "*", + } # create 2 duplicates schedules - sched1 = CrontabSchedule.objects.create(hour="4") - CrontabSchedule.objects.create(hour="4") - self.assertEqual(CrontabSchedule.objects.count(), 2) + sched1 = CrontabSchedule.objects.create(**kwargs) + CrontabSchedule.objects.create(**kwargs) + self.assertEqual(CrontabSchedule.objects.filter(**kwargs).count(), 2) # try to create a duplicate CrontabSchedule from a celery schedule schedule = schedules.crontab(hour="4") sched3 = CrontabSchedule.from_schedule(schedule) # the schedule should be the first of the 2 previous duplicates self.assertEqual(sched3, sched1) # and the duplicates should not be deleted ! - self.assertEqual(CrontabSchedule.objects.count(), 2) + self.assertEqual(CrontabSchedule.objects.filter(**kwargs).count(), 2) class SolarScheduleTestCase(TestCase): @@ -132,14 +140,14 @@ def test_duplicate_schedules(self): # create 2 duplicates schedules sched1 = IntervalSchedule.objects.create(**kwargs) IntervalSchedule.objects.create(**kwargs) - self.assertEqual(IntervalSchedule.objects.count(), 2) + self.assertEqual(IntervalSchedule.objects.filter(**kwargs).count(), 2) # try to create a duplicate IntervalSchedule from a celery schedule schedule = schedules.schedule(run_every=1.0) sched3 = IntervalSchedule.from_schedule(schedule) # the schedule should be the first of the 2 previous duplicates self.assertEqual(sched3, sched1) # and the duplicates should not be deleted ! - self.assertEqual(IntervalSchedule.objects.count(), 2) + self.assertEqual(IntervalSchedule.objects.filter(**kwargs).count(), 2) class ClockedScheduleTestCase(TestCase): @@ -156,4 +164,6 @@ def test_duplicate_schedules(self): # the schedule should be the first of the 2 previous duplicates self.assertEqual(sched3, sched1) # and the duplicates should not be deleted ! - self.assertEqual(ClockedSchedule.objects.count(), 2) + self.assertEqual( + ClockedSchedule.objects.filter(clocked_time=d).count(), 2 + ) From 1c00ed56e669775ab99eab86260417cba417bad6 Mon Sep 17 00:00:00 2001 From: David Jean Louis Date: Tue, 2 Mar 2021 18:24:02 +0100 Subject: [PATCH 4/5] Removed MultipleObjectsReturned block - removed MultipleObjectsReturned code block for solar schedules as unique_togther constraint safely prevents duplicates - removed related test method --- django_celery_beat/models.py | 7 +++---- t/unit/test_models.py | 5 ----- 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/django_celery_beat/models.py b/django_celery_beat/models.py index 474dc4c4..583d8b63 100644 --- a/django_celery_beat/models.py +++ b/django_celery_beat/models.py @@ -117,14 +117,13 @@ def from_schedule(cls, schedule): spec = {'event': schedule.event, 'latitude': schedule.lat, 'longitude': schedule.lon} + + # we do not check for MultipleObjectsReturned exception here because + # the unique_together constraint safely prevents from duplicates try: return cls.objects.get(**spec) except cls.DoesNotExist: return cls(**spec) - except MultipleObjectsReturned: - # unique_together constraint should not permit reaching this code, - # but just in case, we return the first schedule found - return cls.objects.filter(**spec).first() def __str__(self): return '{0} ({1}, {2})'.format( diff --git a/t/unit/test_models.py b/t/unit/test_models.py index bc532c05..095fc32e 100644 --- a/t/unit/test_models.py +++ b/t/unit/test_models.py @@ -126,11 +126,6 @@ def test_celery_solar_schedules_included_as_event_choices(self): for event_choice in event_choices_values: assert event_choice in schedules.solar._all_events - def test_duplicate_schedules(self): - # Duplicates cannot be tested for solar schedules because of the - # unique constraints in the SolarSchedule model - pass - class IntervalScheduleTestCase(TestCase): From 8d846c81b5780b865661a5e96f6864a22a69e5db Mon Sep 17 00:00:00 2001 From: David Jean Louis Date: Tue, 2 Mar 2021 18:41:12 +0100 Subject: [PATCH 5/5] DRY --- t/unit/test_models.py | 60 ++++++++++++++++--------------------------- 1 file changed, 22 insertions(+), 38 deletions(-) diff --git a/t/unit/test_models.py b/t/unit/test_models.py index 095fc32e..627e47b5 100644 --- a/t/unit/test_models.py +++ b/t/unit/test_models.py @@ -67,7 +67,22 @@ def test_models_match_migrations(self): msg='Model changes exist that do not have a migration') -class CrontabScheduleTestCase(TestCase): +class TestDuplicatesMixin: + def _test_duplicate_schedules(self, cls, kwargs, schedule=None): + sched1 = cls.objects.create(**kwargs) + cls.objects.create(**kwargs) + self.assertEqual(cls.objects.filter(**kwargs).count(), 2) + # try to create a duplicate schedule from a celery schedule + if schedule is None: + schedule = sched1.schedule + sched3 = cls.from_schedule(schedule) + # the schedule should be the first of the 2 previous duplicates + self.assertEqual(sched3, sched1) + # and the duplicates should not be deleted ! + self.assertEqual(cls.objects.filter(**kwargs).count(), 2) + + +class CrontabScheduleTestCase(TestCase, TestDuplicatesMixin): FIRST_VALID_TIMEZONE = timezone_field.\ TimeZoneField.default_choices[0][0].zone @@ -88,17 +103,8 @@ def test_duplicate_schedules(self): "month_of_year": "*", "day_of_week": "*", } - # create 2 duplicates schedules - sched1 = CrontabSchedule.objects.create(**kwargs) - CrontabSchedule.objects.create(**kwargs) - self.assertEqual(CrontabSchedule.objects.filter(**kwargs).count(), 2) - # try to create a duplicate CrontabSchedule from a celery schedule schedule = schedules.crontab(hour="4") - sched3 = CrontabSchedule.from_schedule(schedule) - # the schedule should be the first of the 2 previous duplicates - self.assertEqual(sched3, sched1) - # and the duplicates should not be deleted ! - self.assertEqual(CrontabSchedule.objects.filter(**kwargs).count(), 2) + self._test_duplicate_schedules(CrontabSchedule, kwargs, schedule) class SolarScheduleTestCase(TestCase): @@ -127,38 +133,16 @@ def test_celery_solar_schedules_included_as_event_choices(self): assert event_choice in schedules.solar._all_events -class IntervalScheduleTestCase(TestCase): +class IntervalScheduleTestCase(TestCase, TestDuplicatesMixin): def test_duplicate_schedules(self): - # See: https://github.com/celery/django-celery-beat/issues/322 kwargs = {'every': 1, 'period': IntervalSchedule.SECONDS} - # create 2 duplicates schedules - sched1 = IntervalSchedule.objects.create(**kwargs) - IntervalSchedule.objects.create(**kwargs) - self.assertEqual(IntervalSchedule.objects.filter(**kwargs).count(), 2) - # try to create a duplicate IntervalSchedule from a celery schedule schedule = schedules.schedule(run_every=1.0) - sched3 = IntervalSchedule.from_schedule(schedule) - # the schedule should be the first of the 2 previous duplicates - self.assertEqual(sched3, sched1) - # and the duplicates should not be deleted ! - self.assertEqual(IntervalSchedule.objects.filter(**kwargs).count(), 2) + self._test_duplicate_schedules(IntervalSchedule, kwargs, schedule) -class ClockedScheduleTestCase(TestCase): +class ClockedScheduleTestCase(TestCase, TestDuplicatesMixin): def test_duplicate_schedules(self): - # See: https://github.com/celery/django-celery-beat/issues/322 - d = timezone.now() - # create 2 duplicates schedules - sched1 = ClockedSchedule.objects.create(clocked_time=d) - sched2 = ClockedSchedule.objects.create(clocked_time=d) - self.assertEqual(ClockedSchedule.objects.count(), 2) - # try to create a duplicate ClockedSchedule from a previous schedule - sched3 = ClockedSchedule.from_schedule(sched2.schedule) - # the schedule should be the first of the 2 previous duplicates - self.assertEqual(sched3, sched1) - # and the duplicates should not be deleted ! - self.assertEqual( - ClockedSchedule.objects.filter(clocked_time=d).count(), 2 - ) + kwargs = {'clocked_time': timezone.now()} + self._test_duplicate_schedules(ClockedSchedule, kwargs)