Merge ~ltrager/maas:remote-script-runner-reliability into maas:master

Proposed by Lee Trager
Status: Merged
Approved by: Lee Trager
Approved revision: c9a6de66a77f0d6f961a4ece706c5448e1fdfaea
Merge reported by: MAAS Lander
Merged at revision: not available
Proposed branch: ~ltrager/maas:remote-script-runner-reliability
Merge into: maas:master
Diff against target: 135 lines (+13/-19)
5 files modified
src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py (+2/-1)
src/provisioningserver/refresh/__init__.py (+2/-1)
src/provisioningserver/refresh/maas_api_helper.py (+4/-9)
src/provisioningserver/refresh/tests/test_maas_api_helper.py (+3/-6)
src/provisioningserver/refresh/tests/test_refresh.py (+2/-2)
Reviewer Review Type Date Requested Status
Andres Rodriguez (community) Approve
Review via email: mp+331995@code.launchpad.net

Commit message

LP: #1710092 - Use a monotonic clock instead of relative time.

Previously MAAS was using datetime.now() to calculate timeout. datetime.now()
may change use to NTP running or under high system load. time.monotonic()
gives a consistent incrementing time rate.

Description of the change

Mike started this branch[1] to fix some issues he was seeing. On my system this does make the stress-ng tests more reliable. However the test system does lock up if I run all tests including stress-ng-cpu-long and stress-ng-memory-long.

[1] https://code.launchpad.net/~mpontillo/maas/+git/maas/+merge/331030

To post a comment you must log in.
Revision history for this message
Andres Rodriguez (andreserl) wrote :

Let’s not revert the padding of time. Regardless of the improvements I
still think we should have better padding time!

On Sun, Oct 8, 2017 at 9:28 PM Lee Trager <email address hidden> wrote:

> The proposal to merge ~ltrager/maas:remote-script-runner-reliability into
> maas:master has been updated.
>
> Description changed to:
>
> Mike started this branch[1] to fix some issues he was seeing. On my system
> after switching to a monotonic clock the stress-ng tests no longer time
> out. I've reverted the 5 minutes padding back to 60 seconds as the cause
> seems to be the clock not the timeout.
>
> [1] https://code.launchpad.net/~mpontillo/maas/+git/maas/+merge/331030
>
> For more details, see:
> https://code.launchpad.net/~ltrager/maas/+git/maas/+merge/331995
> --
> Your team MAAS Committers is subscribed to branch maas:master.
>
--
Andres Rodriguez
Engineering Manager, MAAS
Canonical USA, Inc.

Revision history for this message
Andres Rodriguez (andreserl) wrote :

Please don’t revert the 5 min padding!

review: Needs Fixing
Revision history for this message
Andres Rodriguez (andreserl) wrote :

lgtm!

review: Approve
Revision history for this message
MAAS Lander (maas-lander) wrote :

LANDING
-b remote-script-runner-reliability lp:~ltrager/maas into -b master lp:~maas-committers/maas

STATUS: FAILED BUILD
LOG: http://maas-ci-jenkins.internal:8080/job/maas/job/branch-tester/455/consoleText

Revision history for this message
MAAS Lander (maas-lander) wrote :

LANDING
-b remote-script-runner-reliability lp:~ltrager/maas into -b master lp:~maas-committers/maas

STATUS: FAILED BUILD
LOG: http://maas-ci-jenkins.internal:8080/job/maas/job/branch-tester/456/consoleText

c9a6de6... by Lee Trager

Fix failing test

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py b/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py
index 9dc5c24..5ff7ced 100644
--- a/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py
+++ b/src/metadataserver/user_data/templates/snippets/maas_run_remote_scripts.py
@@ -29,6 +29,7 @@ import re
29import shlex29import shlex
30from subprocess import (30from subprocess import (
31 check_output,31 check_output,
32 DEVNULL,
32 PIPE,33 PIPE,
33 Popen,34 Popen,
34 TimeoutExpired,35 TimeoutExpired,
@@ -91,7 +92,7 @@ def download_and_extract_tar(url, creds, scripts_dir):
91def run_and_check(92def run_and_check(
92 cmd, combined_path, stdout_path, stderr_path, script_name, args,93 cmd, combined_path, stdout_path, stderr_path, script_name, args,
93 ignore_error=False):94 ignore_error=False):
94 proc = Popen(cmd, stdout=PIPE, stderr=PIPE)95 proc = Popen(cmd, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
95 capture_script_output(proc, combined_path, stdout_path, stderr_path)96 capture_script_output(proc, combined_path, stdout_path, stderr_path)
96 if proc.returncode != 0 and not ignore_error:97 if proc.returncode != 0 and not ignore_error:
97 args['exit_status'] = proc.returncode98 args['exit_status'] = proc.returncode
diff --git a/src/provisioningserver/refresh/__init__.py b/src/provisioningserver/refresh/__init__.py
index bf05f10..cd52816 100644
--- a/src/provisioningserver/refresh/__init__.py
+++ b/src/provisioningserver/refresh/__init__.py
@@ -7,6 +7,7 @@ import os
7import socket7import socket
8import stat8import stat
9from subprocess import (9from subprocess import (
10 DEVNULL,
10 PIPE,11 PIPE,
11 Popen,12 Popen,
12 TimeoutExpired,13 TimeoutExpired,
@@ -164,7 +165,7 @@ def runscripts(scripts, url, creds, tmpdir):
164 timeout_seconds = timeout.seconds165 timeout_seconds = timeout.seconds
165166
166 try:167 try:
167 proc = Popen(script_path, stdout=PIPE, stderr=PIPE)168 proc = Popen(script_path, stdin=DEVNULL, stdout=PIPE, stderr=PIPE)
168 capture_script_output(169 capture_script_output(
169 proc, combined_path, stdout_path, stderr_path, timeout_seconds)170 proc, combined_path, stdout_path, stderr_path, timeout_seconds)
170 except OSError as e:171 except OSError as e:
diff --git a/src/provisioningserver/refresh/maas_api_helper.py b/src/provisioningserver/refresh/maas_api_helper.py
index ab80934..e2826f6 100644
--- a/src/provisioningserver/refresh/maas_api_helper.py
+++ b/src/provisioningserver/refresh/maas_api_helper.py
@@ -10,10 +10,6 @@ __all__ = [
10 ]10 ]
1111
12from collections import OrderedDict12from collections import OrderedDict
13from datetime import (
14 datetime,
15 timedelta,
16)
17from email.utils import parsedate13from email.utils import parsedate
18import json14import json
19import mimetypes15import mimetypes
@@ -295,8 +291,7 @@ def capture_script_output(
295 timeout = None291 timeout = None
296 else:292 else:
297 # Pad the timeout by 5 minutes to allow for cleanup.293 # Pad the timeout by 5 minutes to allow for cleanup.
298 timeout = datetime.now() + timedelta(294 timeout = time.monotonic() + timeout_seconds + (60 * 5)
299 minutes=5, seconds=timeout_seconds)
300295
301 # Create the file and then open it in read write mode for terminal296 # Create the file and then open it in read write mode for terminal
302 # emulation.297 # emulation.
@@ -310,14 +305,14 @@ def capture_script_output(
310 while selector.get_map() and proc.poll() is None:305 while selector.get_map() and proc.poll() is None:
311 # Select with a short timeout so that we don't tight loop.306 # Select with a short timeout so that we don't tight loop.
312 _select_script_output(selector, combined, 0.1)307 _select_script_output(selector, combined, 0.1)
313 if timeout is not None and datetime.now() > timeout:308 if timeout is not None and time.monotonic() > timeout:
314 break309 break
315 else:310 else:
316 # Process has finished or has closed stdout and stderr.311 # Process has finished or has closed stdout and stderr.
317 # Process anything still sitting in the latter's buffers.312 # Process anything still sitting in the latter's buffers.
318 _select_script_output(selector, combined, 0.0)313 _select_script_output(selector, combined, 0.0)
319314
320 now = datetime.now()315 now = time.monotonic()
321 # Wait for the process to finish.316 # Wait for the process to finish.
322 if timeout is None:317 if timeout is None:
323 # No timeout just wait until the process finishes.318 # No timeout just wait until the process finishes.
@@ -330,7 +325,7 @@ def capture_script_output(
330 # stdout and stderr have been closed but the timeout has not been325 # stdout and stderr have been closed but the timeout has not been
331 # exceeded. Run with the remaining amount of time.326 # exceeded. Run with the remaining amount of time.
332 try:327 try:
333 return proc.wait(timeout=(timeout - now).seconds)328 return proc.wait(timeout=(timeout - now))
334 except TimeoutExpired:329 except TimeoutExpired:
335 # Make sure the process was killed330 # Make sure the process was killed
336 proc.kill()331 proc.kill()
diff --git a/src/provisioningserver/refresh/tests/test_maas_api_helper.py b/src/provisioningserver/refresh/tests/test_maas_api_helper.py
index 3bf9b83..72f6341 100644
--- a/src/provisioningserver/refresh/tests/test_maas_api_helper.py
+++ b/src/provisioningserver/refresh/tests/test_maas_api_helper.py
@@ -6,7 +6,6 @@
6__all__ = []6__all__ = []
77
8from collections import OrderedDict8from collections import OrderedDict
9from datetime import timedelta
10from email.utils import formatdate9from email.utils import formatdate
11from io import StringIO10from io import StringIO
12import json11import json
@@ -615,11 +614,9 @@ class TestCaptureScriptOutput(MAASTestCase):
615 )))614 )))
616615
617 def test__timeout(self):616 def test__timeout(self):
618 # The timeout is padded by 60 seconds, override it so the test can617 self.patch(maas_api_helper.time, 'monotonic').side_effect = (
619 # run quickly.618 0, 60 * 6, 60 * 6, 60 * 6)
620 self.patch(maas_api_helper, 'timedelta').return_value = timedelta(
621 microseconds=1)
622 proc = Popen(619 proc = Popen(
623 'echo "test"', stdout=PIPE, stderr=PIPE, shell=True)620 'echo "test"', stdout=PIPE, stderr=PIPE, shell=True)
624 self.assertRaises(621 self.assertRaises(
625 TimeoutExpired, self.capture, proc, random.randint(1, 500))622 TimeoutExpired, self.capture, proc, random.randint(1, 5))
diff --git a/src/provisioningserver/refresh/tests/test_refresh.py b/src/provisioningserver/refresh/tests/test_refresh.py
index d03cac1..2c5273a 100644
--- a/src/provisioningserver/refresh/tests/test_refresh.py
+++ b/src/provisioningserver/refresh/tests/test_refresh.py
@@ -341,8 +341,8 @@ class TestRefresh(MAASTestCase):
341 script_name = factory.make_name('script_name')341 script_name = factory.make_name('script_name')
342 timeout = timedelta(seconds=random.randint(1, 500))342 timeout = timedelta(seconds=random.randint(1, 500))
343 self.patch_scripts_failure(script_name, timeout)343 self.patch_scripts_failure(script_name, timeout)
344 self.patch(refresh.maas_api_helper, 'timedelta').return_value = (344 self.patch(refresh.maas_api_helper.time, 'monotonic').side_effect = (
345 timedelta(microseconds=1))345 0, timeout.seconds + (60 * 6), timeout.seconds + (60 * 6))
346346
347 system_id = factory.make_name('system_id')347 system_id = factory.make_name('system_id')
348 consumer_key = factory.make_name('consumer_key')348 consumer_key = factory.make_name('consumer_key')

Subscribers

People subscribed via source and target branches