Merge ~ltrager/maas:remote-script-runner-reliability into maas:master

Proposed by Lee Trager
Status: Merged
Approved by: Lee Trager
Approved revision: c9a6de66a77f0d6f961a4ece706c5448e1fdfaea
Merge reported by: MAAS Lander
Merged at revision: not available
Proposed branch: ~ltrager/maas:remote-script-runner-reliability
Merge into: maas:master
Diff against target: 135 lines (+13/-19)
5 files modified
src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py (+2/-1)
src/provisioningserver/refresh/__init__.py (+2/-1)
src/provisioningserver/refresh/maas_api_helper.py (+4/-9)
src/provisioningserver/refresh/tests/test_maas_api_helper.py (+3/-6)
src/provisioningserver/refresh/tests/test_refresh.py (+2/-2)
Reviewer Review Type Date Requested Status
Andres Rodriguez (community) Approve
Review via email: mp+331995@code.launchpad.net

Commit message

LP: #1710092 - Use a monotonic clock instead of relative time.

Previously MAAS was using datetime.now() to calculate timeout. datetime.now()
may change use to NTP running or under high system load. time.monotonic()
gives a consistent incrementing time rate.

Description of the change

Mike started this branch[1] to fix some issues he was seeing. On my system this does make the stress-ng tests more reliable. However the test system does lock up if I run all tests including stress-ng-cpu-long and stress-ng-memory-long.

[1] https://code.launchpad.net/~mpontillo/maas/+git/maas/+merge/331030

To post a comment you must log in.
Revision history for this message
Andres Rodriguez (andreserl) wrote :

Let’s not revert the padding of time. Regardless of the improvements I
still think we should have better padding time!

On Sun, Oct 8, 2017 at 9:28 PM Lee Trager <email address hidden> wrote:

> The proposal to merge ~ltrager/maas:remote-script-runner-reliability into
> maas:master has been updated.
>
> Description changed to:
>
> Mike started this branch[1] to fix some issues he was seeing. On my system
> after switching to a monotonic clock the stress-ng tests no longer time
> out. I've reverted the 5 minutes padding back to 60 seconds as the cause
> seems to be the clock not the timeout.
>
> [1] https://code.launchpad.net/~mpontillo/maas/+git/maas/+merge/331030
>
> For more details, see:
> https://code.launchpad.net/~ltrager/maas/+git/maas/+merge/331995
> --
> Your team MAAS Committers is subscribed to branch maas:master.
>
--
Andres Rodriguez
Engineering Manager, MAAS
Canonical USA, Inc.

Revision history for this message
Andres Rodriguez (andreserl) wrote :

Please don’t revert the 5 min padding!

review: Needs Fixing
Revision history for this message
Andres Rodriguez (andreserl) wrote :

lgtm!

review: Approve
Revision history for this message
MAAS Lander (maas-lander) wrote :

LANDING
-b remote-script-runner-reliability lp:~ltrager/maas into -b master lp:~maas-committers/maas

STATUS: FAILED BUILD
LOG: http://maas-ci-jenkins.internal:8080/job/maas/job/branch-tester/455/consoleText

Revision history for this message
MAAS Lander (maas-lander) wrote :

LANDING
-b remote-script-runner-reliability lp:~ltrager/maas into -b master lp:~maas-committers/maas

STATUS: FAILED BUILD
LOG: http://maas-ci-jenkins.internal:8080/job/maas/job/branch-tester/456/consoleText

c9a6de6... by Lee Trager

Fix failing test

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py b/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py
2index 9dc5c24..5ff7ced 100644
3--- a/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py
4+++ b/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py
5@@ -29,6 +29,7 @@ import re
6 import shlex
7 from subprocess import (
8 check_output,
9+ DEVNULL,
10 PIPE,
11 Popen,
12 TimeoutExpired,
13@@ -91,7 +92,7 @@ def download_and_extract_tar(url, creds, scripts_dir):
14 def run_and_check(
15 cmd, combined_path, stdout_path, stderr_path, script_name, args,
16 ignore_error=False):
17- proc = Popen(cmd, stdout=PIPE, stderr=PIPE)
18+ proc = Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
19 capture_script_output(proc, combined_path, stdout_path, stderr_path)
20 if proc.returncode != 0 and not ignore_error:
21 args['exit_status'] = proc.returncode
22diff --git a/src/provisioningserver/refresh/__init__.py b/src/provisioningserver/refresh/__init__.py
23index bf05f10..cd52816 100644
24--- a/src/provisioningserver/refresh/__init__.py
25+++ b/src/provisioningserver/refresh/__init__.py
26@@ -7,6 +7,7 @@ import os
27 import socket
28 import stat
29 from subprocess import (
30+ DEVNULL,
31 PIPE,
32 Popen,
33 TimeoutExpired,
34@@ -164,7 +165,7 @@ def runscripts(scripts, url, creds, tmpdir):
35 timeout_seconds = timeout.seconds
36
37 try:
38- proc = Popen(script_path, stdout=PIPE, stderr=PIPE)
39+ proc = Popen(script_path, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
40 capture_script_output(
41 proc, combined_path, stdout_path, stderr_path, timeout_seconds)
42 except OSError as e:
43diff --git a/src/provisioningserver/refresh/maas_api_helper.py b/src/provisioningserver/refresh/maas_api_helper.py
44index ab80934..e2826f6 100644
45--- a/src/provisioningserver/refresh/maas_api_helper.py
46+++ b/src/provisioningserver/refresh/maas_api_helper.py
47@@ -10,10 +10,6 @@ __all__ = [
48 ]
49
50 from collections import OrderedDict
51-from datetime import (
52- datetime,
53- timedelta,
54-)
55 from email.utils import parsedate
56 import json
57 import mimetypes
58@@ -295,8 +291,7 @@ def capture_script_output(
59 timeout = None
60 else:
61 # Pad the timeout by 5 minutes to allow for cleanup.
62- timeout = datetime.now() + timedelta(
63- minutes=5, seconds=timeout_seconds)
64+ timeout = time.monotonic() + timeout_seconds + (60 * 5)
65
66 # Create the file and then open it in read write mode for terminal
67 # emulation.
68@@ -310,14 +305,14 @@ def capture_script_output(
69 while selector.get_map() and proc.poll() is None:
70 # Select with a short timeout so that we don't tight loop.
71 _select_script_output(selector, combined, 0.1)
72- if timeout is not None and datetime.now() > timeout:
73+ if timeout is not None and time.monotonic() > timeout:
74 break
75 else:
76 # Process has finished or has closed stdout and stderr.
77 # Process anything still sitting in the latter's buffers.
78 _select_script_output(selector, combined, 0.0)
79
80- now = datetime.now()
81+ now = time.monotonic()
82 # Wait for the process to finish.
83 if timeout is None:
84 # No timeout just wait until the process finishes.
85@@ -330,7 +325,7 @@ def capture_script_output(
86 # stdout and stderr have been closed but the timeout has not been
87 # exceeded. Run with the remaining amount of time.
88 try:
89- return proc.wait(timeout=(timeout - now).seconds)
90+ return proc.wait(timeout=(timeout - now))
91 except TimeoutExpired:
92 # Make sure the process was killed
93 proc.kill()
94diff --git a/src/provisioningserver/refresh/tests/test_maas_api_helper.py b/src/provisioningserver/refresh/tests/test_maas_api_helper.py
95index 3bf9b83..72f6341 100644
96--- a/src/provisioningserver/refresh/tests/test_maas_api_helper.py
97+++ b/src/provisioningserver/refresh/tests/test_maas_api_helper.py
98@@ -6,7 +6,6 @@
99 __all__ = []
100
101 from collections import OrderedDict
102-from datetime import timedelta
103 from email.utils import formatdate
104 from io import StringIO
105 import json
106@@ -615,11 +614,9 @@ class TestCaptureScriptOutput(MAASTestCase):
107 )))
108
109 def test__timeout(self):
110- # The timeout is padded by 60 seconds, override it so the test can
111- # run quickly.
112- self.patch(maas_api_helper, 'timedelta').return_value = timedelta(
113- microseconds=1)
114+ self.patch(maas_api_helper.time, 'monotonic').side_effect = (
115+ 0, 60 * 6, 60 * 6, 60 * 6)
116 proc = Popen(
117 'echo "test"', stdout=PIPE, stderr=PIPE, shell=True)
118 self.assertRaises(
119- TimeoutExpired, self.capture, proc, random.randint(1, 500))
120+ TimeoutExpired, self.capture, proc, random.randint(1, 5))
121diff --git a/src/provisioningserver/refresh/tests/test_refresh.py b/src/provisioningserver/refresh/tests/test_refresh.py
122index d03cac1..2c5273a 100644
123--- a/src/provisioningserver/refresh/tests/test_refresh.py
124+++ b/src/provisioningserver/refresh/tests/test_refresh.py
125@@ -341,8 +341,8 @@ class TestRefresh(MAASTestCase):
126 script_name = factory.make_name('script_name')
127 timeout = timedelta(seconds=random.randint(1, 500))
128 self.patch_scripts_failure(script_name, timeout)
129- self.patch(refresh.maas_api_helper, 'timedelta').return_value = (
130- timedelta(microseconds=1))
131+ self.patch(refresh.maas_api_helper.time, 'monotonic').side_effect = (
132+ 0, timeout.seconds + (60 * 6), timeout.seconds + (60 * 6))
133
134 system_id = factory.make_name('system_id')
135 consumer_key = factory.make_name('consumer_key')

Subscribers

People subscribed via source and target branches