Merge lp:~jtv/maas/1.5-bug-1354511 into lp:maas/1.5

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: no longer in the source branch.
Merged at revision: 2296
Proposed branch: lp:~jtv/maas/1.5-bug-1354511
Merge into: lp:maas/1.5
Diff against target: 162 lines (+76/-21)
2 files modified
src/maasserver/api.py (+28/-9)
src/maasserver/tests/test_api_commissioning.py (+48/-12)
To merge this branch: bzr merge lp:~jtv/maas/1.5-bug-1354511
Reviewer Review Type Date Requested Status
Jeroen T. Vermeulen (community) Approve
Review via email: mp+230177@code.launchpad.net

Commit message

Backport trunk r2668: When timing out nodes that haven't finished commissioning, compare the last-update timestamp to the database clock (from which it was also generated), not to the Python-side clock. Timezone skew between the two could cause nodes to time out immediately, or hours late.

Description of the change

Straight backport. For self-approval.

Jeroen

To post a comment you must log in.
Revision history for this message
Jeroen T. Vermeulen (jtv) wrote :

Self-approving.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'src/maasserver/api.py'
--- src/maasserver/api.py 2014-07-24 00:09:02 +0000
+++ src/maasserver/api.py 2014-08-08 20:17:35 +0000
@@ -97,10 +97,6 @@
97 b64encode,97 b64encode,
98 )98 )
99from cStringIO import StringIO99from cStringIO import StringIO
100from datetime import (
101 datetime,
102 timedelta,
103 )
104from functools import partial100from functools import partial
105import httplib101import httplib
106from inspect import getdoc102from inspect import getdoc
@@ -704,12 +700,35 @@
704 Anything that has been commissioning for longer than700 Anything that has been commissioning for longer than
705 settings.COMMISSIONING_TIMEOUT is moved into the FAILED_TESTS status.701 settings.COMMISSIONING_TIMEOUT is moved into the FAILED_TESTS status.
706 """702 """
707 interval = timedelta(minutes=settings.COMMISSIONING_TIMEOUT)703 # Compute the cutoff time on the database, using the database's
708 cutoff = datetime.now() - interval704 # clock to compare to the "updated" timestamp, also set from the
709 query = Node.objects.filter(705 # database's clock. Otherwise, a sufficient difference between the
710 status=NODE_STATUS.COMMISSIONING, updated__lte=cutoff)706 # two clocks (including timezone offset!) might cause commissioning to
707 # "time out" immediately, or hours late.
708 #
709 # This timeout relies on nothing else updating the commissioning node
710 # within the hour. Otherwise, the timestamp will be refreshed as a
711 # side effect and timeout will be postponed.
712 #
713 # This query both identifies and updates the failed nodes. It
714 # refreshes the "updated" timestamp, but does not run any Django-side
715 # code associated with saving the nodes.
716 params = {
717 'commissioning': NODE_STATUS.COMMISSIONING,
718 'failed_tests': NODE_STATUS.FAILED_TESTS,
719 'minutes': settings.COMMISSIONING_TIMEOUT
720 }
721 query = Node.objects.raw("""
722 UPDATE maasserver_node
723 SET
724 status = %(failed_tests)s,
725 updated = now()
726 WHERE
727 status = %(commissioning)s AND
728 updated <= (now() - interval '%(minutes)f minutes')
729 RETURNING *
730 """ % params)
711 results = list(query)731 results = list(query)
712 query.update(status=NODE_STATUS.FAILED_TESTS)
713 # Note that Django doesn't call save() on updated nodes here,732 # Note that Django doesn't call save() on updated nodes here,
714 # but I don't think anything requires its effects anyway.733 # but I don't think anything requires its effects anyway.
715 return results734 return results
716735
=== modified file 'src/maasserver/tests/test_api_commissioning.py'
--- src/maasserver/tests/test_api_commissioning.py 2014-02-01 04:04:37 +0000
+++ src/maasserver/tests/test_api_commissioning.py 2014-08-08 20:17:35 +0000
@@ -15,15 +15,12 @@
15__all__ = []15__all__ = []
1616
17from base64 import b64encode17from base64 import b64encode
18from datetime import (
19 datetime,
20 timedelta,
21 )
22import httplib18import httplib
23import json19import json
2420
25from django.conf import settings21from django.conf import settings
26from django.core.urlresolvers import reverse22from django.core.urlresolvers import reverse
23from django.db import connection
27from maasserver.enum import NODE_STATUS24from maasserver.enum import NODE_STATUS
28from maasserver.testing import reload_object25from maasserver.testing import reload_object
29from maasserver.testing.api import APITestCase26from maasserver.testing.api import APITestCase
@@ -36,9 +33,22 @@
36class TestCommissioningTimeout(MAASServerTestCase):33class TestCommissioningTimeout(MAASServerTestCase):
37 """Testing of commissioning timeout API."""34 """Testing of commissioning timeout API."""
3835
36 def age_node(self, node, minutes, cursor=None):
37 """Set back the update/creation timestamps on `node` by `minutes`."""
38 if cursor is None:
39 cursor = connection.cursor()
40 cursor.execute("""
41 UPDATE maasserver_node
42 SET
43 created = created - interval '%f minutes',
44 updated = updated - interval '%f minutes'
45 WHERE id = %d
46 """ % (minutes, minutes, node.id))
47
39 def test_check_with_no_action(self):48 def test_check_with_no_action(self):
40 self.client_log_in()49 self.client_log_in()
41 node = factory.make_node(status=NODE_STATUS.READY)50 node = factory.make_node(status=NODE_STATUS.READY)
51 self.age_node(node, settings.COMMISSIONING_TIMEOUT + 100)
42 response = self.client.post(52 response = self.client.post(
43 reverse('nodes_handler'), {'op': 'check_commissioning'})53 reverse('nodes_handler'), {'op': 'check_commissioning'})
44 # Anything that's not commissioning should be ignored.54 # Anything that's not commissioning should be ignored.
@@ -49,8 +59,8 @@
4959
50 def test_check_with_commissioning_but_not_expired_node(self):60 def test_check_with_commissioning_but_not_expired_node(self):
51 self.client_log_in()61 self.client_log_in()
52 node = factory.make_node(62 node = factory.make_node(status=NODE_STATUS.COMMISSIONING)
53 status=NODE_STATUS.COMMISSIONING)63 self.age_node(node, settings.COMMISSIONING_TIMEOUT - 1)
54 response = self.client.post(64 response = self.client.post(
55 reverse('nodes_handler'), {'op': 'check_commissioning'})65 reverse('nodes_handler'), {'op': 'check_commissioning'})
56 node = reload_object(node)66 node = reload_object(node)
@@ -60,12 +70,8 @@
6070
61 def test_check_with_commissioning_and_expired_node(self):71 def test_check_with_commissioning_and_expired_node(self):
62 self.client_log_in()72 self.client_log_in()
63 # Have an interval 1 second longer than the timeout.73 node = factory.make_node(status=NODE_STATUS.COMMISSIONING)
64 interval = timedelta(seconds=1, minutes=settings.COMMISSIONING_TIMEOUT)74 self.age_node(node, settings.COMMISSIONING_TIMEOUT + 1)
65 updated_at = datetime.now() - interval
66 node = factory.make_node(
67 status=NODE_STATUS.COMMISSIONING, created=datetime.now(),
68 updated=updated_at)
6975
70 response = self.client.post(76 response = self.client.post(
71 reverse('nodes_handler'), {'op': 'check_commissioning'})77 reverse('nodes_handler'), {'op': 'check_commissioning'})
@@ -82,6 +88,36 @@
82 for response_node in json.loads(response.content)],88 for response_node in json.loads(response.content)],
83 ))89 ))
8490
91 def test_check_ignores_timezone_skew_between_python_and_database(self):
92 cursor = connection.cursor()
93 # Set time zone, for the duration of the ongoing transaction.
94 cursor.execute("SET LOCAL TIME ZONE +13")
95 self.client_log_in()
96 late_node = factory.make_node(status=NODE_STATUS.COMMISSIONING)
97 self.age_node(
98 late_node, settings.COMMISSIONING_TIMEOUT + 1, cursor=cursor)
99 timely_node = factory.make_node(status=NODE_STATUS.COMMISSIONING)
100 self.age_node(
101 timely_node, settings.COMMISSIONING_TIMEOUT - 1, cursor=cursor)
102
103 response = self.client.post(
104 reverse('nodes_handler'), {'op': 'check_commissioning'})
105 self.assertEqual(
106 (httplib.OK, [late_node.system_id]),
107 (
108 response.status_code,
109 [
110 response_node['system_id']
111 for response_node in json.loads(response.content)
112 ],
113 ))
114 self.assertEqual(
115 NODE_STATUS.FAILED_TESTS,
116 reload_object(late_node).status)
117 self.assertEqual(
118 NODE_STATUS.COMMISSIONING,
119 reload_object(timely_node).status)
120
85121
86class AdminCommissioningScriptsAPITest(MAASServerTestCase):122class AdminCommissioningScriptsAPITest(MAASServerTestCase):
87 """Tests for `CommissioningScriptsHandler`."""123 """Tests for `CommissioningScriptsHandler`."""

Subscribers

People subscribed via source and target branches

to all changes: