Merge lp:~cjwatson/launchpad/reduce-webhook-retries into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18924
Proposed branch: lp:~cjwatson/launchpad/reduce-webhook-retries
Merge into: lp:launchpad
Diff against target: 173 lines (+61/-20)
2 files modified
lib/lp/services/webhooks/model.py (+15/-2)
lib/lp/services/webhooks/tests/test_job.py (+46/-18)
To merge this branch: bzr merge lp:~cjwatson/launchpad/reduce-webhook-retries
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+365873@code.launchpad.net

Commit message

Retry webhook deliveries that respond with 4xx for an hour rather than a day.

Description of the change

These are generally permanent errors due to some kind of misconfiguration, and we don't want them clogging up our job queue.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) wrote :

I don't think it's completely reasonable to not retry 4xxs at all, but we could be less aggressive. It's not uncommon for something to 404 for a bit during a reconfiguration, for example.

Revision history for this message
Colin Watson (cjwatson) wrote :

To my mind that could just be handled using manual retries, but OK. How about this? I made it retry for up to an hour rather than for up to a day, which should still serve to ease queue depth.

Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/services/webhooks/model.py'
--- lib/lp/services/webhooks/model.py 2017-03-28 14:28:34 +0000
+++ lib/lp/services/webhooks/model.py 2019-04-13 08:58:38 +0000
@@ -1,4 +1,4 @@
1# Copyright 2015-2017 Canonical Ltd. This software is licensed under the1# Copyright 2015-2019 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4__metaclass__ = type4__metaclass__ = type
@@ -468,7 +468,20 @@
468468
469 @property469 @property
470 def retry_automatically(self):470 def retry_automatically(self):
471 return self._time_since_first_attempt < timedelta(days=1)471 if 'result' not in self.json_data:
472 return False
473 if self.json_data['result'].get('connection_error') is not None:
474 duration = timedelta(days=1)
475 else:
476 status_code = self.json_data['result']['response']['status_code']
477 if 500 <= status_code <= 599:
478 duration = timedelta(days=1)
479 else:
480 # Nominally a client error, but let's retry for a little
481 # while anyway since it's quite common for servers to return
482 # such errors for a short time during reconfigurations.
483 duration = timedelta(hours=1)
484 return self._time_since_first_attempt < duration
472485
473 @property486 @property
474 def retry_delay(self):487 def retry_delay(self):
475488
=== modified file 'lib/lp/services/webhooks/tests/test_job.py'
--- lib/lp/services/webhooks/tests/test_job.py 2018-05-31 10:23:03 +0000
+++ lib/lp/services/webhooks/tests/test_job.py 2019-04-13 08:58:38 +0000
@@ -1,4 +1,4 @@
1# Copyright 2015-2018 Canonical Ltd. This software is licensed under the1# Copyright 2015-2019 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Tests for `WebhookJob`s."""4"""Tests for `WebhookJob`s."""
@@ -366,8 +366,8 @@
366 job,366 job,
367 MatchesStructure(367 MatchesStructure(
368 status=Equals(JobStatus.COMPLETED),368 status=Equals(JobStatus.COMPLETED),
369 pending=Equals(False),369 pending=Is(False),
370 successful=Equals(True),370 successful=Is(True),
371 date_sent=Not(Is(None)),371 date_sent=Not(Is(None)),
372 error_message=Is(None),372 error_message=Is(None),
373 json_data=ContainsDict(373 json_data=ContainsDict(
@@ -414,8 +414,8 @@
414 job,414 job,
415 MatchesStructure(415 MatchesStructure(
416 status=Equals(JobStatus.WAITING),416 status=Equals(JobStatus.WAITING),
417 pending=Equals(True),417 pending=Is(True),
418 successful=Equals(False),418 successful=Is(False),
419 date_sent=Not(Is(None)),419 date_sent=Not(Is(None)),
420 error_message=Equals('Bad HTTP response: 404'),420 error_message=Equals('Bad HTTP response: 404'),
421 json_data=ContainsDict(421 json_data=ContainsDict(
@@ -437,8 +437,8 @@
437 job,437 job,
438 MatchesStructure(438 MatchesStructure(
439 status=Equals(JobStatus.WAITING),439 status=Equals(JobStatus.WAITING),
440 pending=Equals(True),440 pending=Is(True),
441 successful=Equals(False),441 successful=Is(False),
442 date_sent=Not(Is(None)),442 date_sent=Not(Is(None)),
443 error_message=Equals('Connection error: Connection refused'),443 error_message=Equals('Connection error: Connection refused'),
444 json_data=ContainsDict(444 json_data=ContainsDict(
@@ -462,7 +462,7 @@
462 job,462 job,
463 MatchesStructure(463 MatchesStructure(
464 status=Equals(JobStatus.FAILED),464 status=Equals(JobStatus.FAILED),
465 pending=Equals(False),465 pending=Is(False),
466 successful=Is(None),466 successful=Is(None),
467 date_sent=Is(None),467 date_sent=Is(None),
468 error_message=Is(None),468 error_message=Is(None),
@@ -483,13 +483,13 @@
483 job,483 job,
484 MatchesStructure(484 MatchesStructure(
485 status=Equals(JobStatus.FAILED),485 status=Equals(JobStatus.FAILED),
486 pending=Equals(False),486 pending=Is(False),
487 successful=Equals(False),487 successful=Is(False),
488 date_sent=Is(None),488 date_sent=Is(None),
489 error_message=Equals('Webhook deactivated'),489 error_message=Equals('Webhook deactivated'),
490 json_data=ContainsDict(490 json_data=ContainsDict(
491 {'result': MatchesDict(491 {'result': MatchesDict(
492 {'webhook_deactivated': Equals(True)})})))492 {'webhook_deactivated': Is(True)})})))
493 self.assertEqual([], reqs)493 self.assertEqual([], reqs)
494 self.assertEqual([], oopses.oopses)494 self.assertEqual([], oopses.oopses)
495495
@@ -526,13 +526,41 @@
526 job.date_first_sent - timedelta(minutes=30)).isoformat()526 job.date_first_sent - timedelta(minutes=30)).isoformat()
527 self.assertEqual(timedelta(hours=1), job.retry_delay)527 self.assertEqual(timedelta(hours=1), job.retry_delay)
528528
529 def test_retry_automatically(self):529 def test_retry_automatically_connection_error(self):
530 # Deliveries are automatically retried until 24 hours after the530 # Deliveries that received a connection error are automatically
531 # initial attempt.531 # retried until 24 hours after the initial attempt.
532 job, reqs = self.makeAndRunJob(
533 raises=requests.ConnectionError('Connection refused'))
534 self.assertTrue(job.retry_automatically)
535 job.json_data['date_first_sent'] = (
536 job.date_first_sent - timedelta(hours=23)).isoformat()
537 self.assertTrue(job.retry_automatically)
538 job.json_data['date_first_sent'] = (
539 job.date_first_sent - timedelta(hours=24)).isoformat()
540 self.assertFalse(job.retry_automatically)
541
542 def test_retry_automatically_5xx(self):
543 # Deliveries that received a 5xx response are automatically retried
544 # until 24 hours after the initial attempt.
545 job, reqs = self.makeAndRunJob(response_status=503)
546 self.assertTrue(job.retry_automatically)
547 job.json_data['date_first_sent'] = (
548 job.date_first_sent - timedelta(hours=23)).isoformat()
549 self.assertTrue(job.retry_automatically)
550 job.json_data['date_first_sent'] = (
551 job.date_first_sent - timedelta(hours=24)).isoformat()
552 self.assertFalse(job.retry_automatically)
553
554 def test_retry_automatically_4xx(self):
555 # Deliveries that received a non-2xx/5xx response are automatically
556 # retried until 24 hours after the initial attempt.
532 job, reqs = self.makeAndRunJob(response_status=404)557 job, reqs = self.makeAndRunJob(response_status=404)
533 self.assertTrue(job.retry_automatically)558 self.assertTrue(job.retry_automatically)
534 job.json_data['date_first_sent'] = (559 job.json_data['date_first_sent'] = (
535 job.date_first_sent - timedelta(hours=24)).isoformat()560 job.date_first_sent - timedelta(minutes=59)).isoformat()
561 self.assertTrue(job.retry_automatically)
562 job.json_data['date_first_sent'] = (
563 job.date_first_sent - timedelta(hours=1)).isoformat()
536 self.assertFalse(job.retry_automatically)564 self.assertFalse(job.retry_automatically)
537565
538 def runJob(self, job):566 def runJob(self, job):
@@ -551,7 +579,7 @@
551 def test_automatic_retries(self):579 def test_automatic_retries(self):
552 hook = self.factory.makeWebhook()580 hook = self.factory.makeWebhook()
553 job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})581 job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})
554 client = MockWebhookClient(response_status=404)582 client = MockWebhookClient(response_status=503)
555 self.useFixture(ZopeUtilityFixture(client, IWebhookClient))583 self.useFixture(ZopeUtilityFixture(client, IWebhookClient))
556584
557 # The first attempt fails but schedules a retry five minutes later.585 # The first attempt fails but schedules a retry five minutes later.
@@ -585,7 +613,7 @@
585 def test_manual_retries(self):613 def test_manual_retries(self):
586 hook = self.factory.makeWebhook()614 hook = self.factory.makeWebhook()
587 job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})615 job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})
588 client = MockWebhookClient(response_status=404)616 client = MockWebhookClient(response_status=503)
589 self.useFixture(ZopeUtilityFixture(client, IWebhookClient))617 self.useFixture(ZopeUtilityFixture(client, IWebhookClient))
590618
591 # Simulate a first attempt failure.619 # Simulate a first attempt failure.
@@ -633,7 +661,7 @@
633 # systemic errors that erroneously failed many deliveries.661 # systemic errors that erroneously failed many deliveries.
634 hook = self.factory.makeWebhook()662 hook = self.factory.makeWebhook()
635 job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})663 job = WebhookDeliveryJob.create(hook, 'test', payload={'foo': 'bar'})
636 client = MockWebhookClient(response_status=404)664 client = MockWebhookClient(response_status=503)
637 self.useFixture(ZopeUtilityFixture(client, IWebhookClient))665 self.useFixture(ZopeUtilityFixture(client, IWebhookClient))
638666
639 # Simulate a first attempt failure.667 # Simulate a first attempt failure.