Merge ~sylvain-pineau/checkbox-ng:mem-improv-1749099 into checkbox-ng:master

Proposed by Sylvain Pineau
Status: Merged
Approved by: Sylvain Pineau
Approved revision: 05dfbf8faa7d836d4e553270f70fca3bff7c306d
Merged at revision: 86f0249dda663dd7ea81bf6b6eff23b393f0e234
Proposed branch: ~sylvain-pineau/checkbox-ng:mem-improv-1749099
Merge into: checkbox-ng:master
Diff against target: 151 lines (+35/-59)
3 files modified
plainbox/impl/exporter/tar.py (+32/-56)
plainbox/impl/exporter/xlsx.py (+1/-1)
plainbox/impl/session/assistant.py (+2/-2)
Reviewer Review Type Date Requested Status
Maciej Kisielewski (community) Approve
Review via email: mp+337945@code.launchpad.net

Description of the change

This MR aims to reduce the memory usage when dealing with reports generation.

using the following 3 techniques:

- xlsx constructor accepts a constant_memory option (See http://xlsxwriter.readthedocs.io/workbook.html?highlight=constant_memory)
- all reports are using SpooledTemporaryFile instead of io.BytesIO to save to disk when files exceed 100K.
- avoid LZMA compression on systems with less than 1GiB of memory.

Submission to C3 still works (https://certification.canonical.com/hardware/201202-10584/submission/127522/)

How to test:

- Tweak tar.py from this branch and adjust the line "if mem_mib < 1200:" to be "if mem_mib < 100200:" (should work except if you have 100G of RAM...)
- Run ulimit -Sv 230000 (you may have to adjust it if checkbox cannot run bootstrap jobs/extcmd)
- Run canonical-certification-cli and just the Misc and Informational categories of 16.04 cert test plan.

To post a comment you must log in.
Revision history for this message
Maciej Kisielewski (kissiel) wrote :

One nitpick inline.
Overall great stuff, +1!

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/plainbox/impl/exporter/tar.py b/plainbox/impl/exporter/tar.py
2index 12d549a..72e1ff0 100644
3--- a/plainbox/impl/exporter/tar.py
4+++ b/plainbox/impl/exporter/tar.py
5@@ -25,10 +25,10 @@
6 THIS MODULE DOES NOT HAVE STABLE PUBLIC API
7 """
8
9-import io
10 import os
11 import tarfile
12 import time
13+from tempfile import SpooledTemporaryFile
14
15 from plainbox.impl.exporter import SessionStateExporterBase
16 from plainbox.impl.exporter.jinja2 import Jinja2SessionStateExporter
17@@ -53,63 +53,39 @@ class TARSessionStateExporter(SessionStateExporterBase):
18 Byte stream to write to.
19
20 """
21- html_stream = io.BytesIO()
22- options_list = [
23- SessionStateExporterBase.OPTION_WITH_COMMENTS,
24- SessionStateExporterBase.OPTION_WITH_IO_LOG,
25- SessionStateExporterBase.OPTION_FLATTEN_IO_LOG,
26- SessionStateExporterBase.OPTION_WITH_JOB_DEFS,
27- SessionStateExporterBase.OPTION_WITH_RESOURCE_MAP,
28- SessionStateExporterBase.OPTION_WITH_CATEGORY_MAP,
29- SessionStateExporterBase.OPTION_WITH_CERTIFICATION_STATUS
30- ]
31- exporter_unit = self._get_all_exporter_units()[
32- 'com.canonical.plainbox::html']
33- html_exporter = Jinja2SessionStateExporter(exporter_unit=exporter_unit)
34- html_exporter.dump_from_session_manager(manager, html_stream)
35- html_tarinfo = tarfile.TarInfo(name="submission.html")
36- html_tarinfo.size = html_stream.tell()
37- html_tarinfo.mtime = time.time()
38- html_stream.seek(0) # Need to rewind the file, puagh
39-
40- json_stream = io.BytesIO()
41- options_list = [
42- SessionStateExporterBase.OPTION_WITH_COMMENTS,
43- SessionStateExporterBase.OPTION_WITH_IO_LOG,
44- SessionStateExporterBase.OPTION_FLATTEN_IO_LOG,
45- SessionStateExporterBase.OPTION_WITH_JOB_DEFS,
46- SessionStateExporterBase.OPTION_WITH_RESOURCE_MAP,
47- SessionStateExporterBase.OPTION_WITH_CATEGORY_MAP,
48- SessionStateExporterBase.OPTION_WITH_CERTIFICATION_STATUS
49- ]
50- exporter_unit = self._get_all_exporter_units()[
51- 'com.canonical.plainbox::json']
52- json_exporter = Jinja2SessionStateExporter(exporter_unit=exporter_unit)
53- json_exporter.dump_from_session_manager(manager, json_stream)
54- json_tarinfo = tarfile.TarInfo(name="submission.json")
55- json_tarinfo.size = json_stream.tell()
56- json_tarinfo.mtime = time.time()
57- json_stream.seek(0) # Need to rewind the file, puagh
58-
59- xlsx_stream = io.BytesIO()
60- options_list = [
61- XLSXSessionStateExporter.OPTION_WITH_SYSTEM_INFO,
62- XLSXSessionStateExporter.OPTION_WITH_SUMMARY,
63- XLSXSessionStateExporter.OPTION_WITH_DESCRIPTION,
64- XLSXSessionStateExporter.OPTION_WITH_TEXT_ATTACHMENTS,
65- ]
66- xlsx_exporter = XLSXSessionStateExporter(options_list)
67- xlsx_exporter.dump_from_session_manager(manager, xlsx_stream)
68- xlsx_tarinfo = tarfile.TarInfo(name="submission.xlsx")
69- xlsx_tarinfo.size = xlsx_stream.tell()
70- xlsx_tarinfo.mtime = time.time()
71- xlsx_stream.seek(0) # Need to rewind the file, puagh
72+ preset = None
73+ mem_bytes = os.sysconf('SC_PAGE_SIZE') * os.sysconf('SC_PHYS_PAGES')
74+ mem_mib = mem_bytes/(1024.**2)
75+ # On systems with less than 1GiB of RAM, create the submission tarball
76+ # without any compression level (i.e preset=0).
77+ # See https://docs.python.org/3/library/lzma.html
78+ # With preset 9 for example, the overhead for an LZMACompressor object
79+ # can be as high as 800 MiB.
80+ if mem_mib < 1200:
81+ preset = 0
82
83 job_state_map = manager.default_device_context.state.job_state_map
84- with tarfile.TarFile.open(None, 'w|xz', stream) as tar:
85- tar.addfile(html_tarinfo, html_stream)
86- tar.addfile(json_tarinfo, json_stream)
87- tar.addfile(xlsx_tarinfo, xlsx_stream)
88+ with tarfile.TarFile.open(None, 'w:xz', stream, preset=preset) as tar:
89+ for fmt in ('html', 'json', 'xlsx'):
90+ if fmt == 'xlsx':
91+ options_list = [
92+ XLSXSessionStateExporter.OPTION_WITH_SYSTEM_INFO,
93+ XLSXSessionStateExporter.OPTION_WITH_SUMMARY,
94+ XLSXSessionStateExporter.OPTION_WITH_DESCRIPTION,
95+ XLSXSessionStateExporter.OPTION_WITH_TEXT_ATTACHMENTS,
96+ ]
97+ exporter = XLSXSessionStateExporter(options_list)
98+ else:
99+ unit = self._get_all_exporter_units()[
100+ 'com.canonical.plainbox::{}'.format(fmt)]
101+ exporter = Jinja2SessionStateExporter(exporter_unit=unit)
102+ with SpooledTemporaryFile(max_size=102400, mode='w+b') as _s:
103+ exporter.dump_from_session_manager(manager, _s)
104+ tarinfo = tarfile.TarInfo(name="submission.{}".format(fmt))
105+ tarinfo.size = _s.tell()
106+ tarinfo.mtime = time.time()
107+ _s.seek(0) # Need to rewind the file, puagh
108+ tar.addfile(tarinfo, _s)
109 for job_id in manager.default_device_context.state.job_state_map:
110 job_state = job_state_map[job_id]
111 try:
112diff --git a/plainbox/impl/exporter/xlsx.py b/plainbox/impl/exporter/xlsx.py
113index e51b193..d10bbe4 100644
114--- a/plainbox/impl/exporter/xlsx.py
115+++ b/plainbox/impl/exporter/xlsx.py
116@@ -688,7 +688,7 @@ class XLSXSessionStateExporter(SessionStateExporterBase):
117 """
118 Public method to dump the XLSX report to a stream
119 """
120- self.workbook = Workbook(stream)
121+ self.workbook = Workbook(stream, {'constant_memory': True})
122 self._set_formats()
123 if self.OPTION_WITH_SYSTEM_INFO in self._option_list:
124 self.worksheet1 = self.workbook.add_worksheet(_('System Info'))
125diff --git a/plainbox/impl/session/assistant.py b/plainbox/impl/session/assistant.py
126index db60157..0a758f0 100644
127--- a/plainbox/impl/session/assistant.py
128+++ b/plainbox/impl/session/assistant.py
129@@ -27,12 +27,12 @@ Session Assistant.
130 import collections
131 import datetime
132 import fnmatch
133-import io
134 import itertools
135 import logging
136 import os
137 import shlex
138 import time
139+from tempfile import SpooledTemporaryFile
140
141 from plainbox.abc import IJobResult
142 from plainbox.abc import IJobRunnerUI
143@@ -1514,7 +1514,7 @@ class SessionAssistant:
144 """
145 UsageExpectation.of(self).enforce()
146 exporter = self._manager.create_exporter(exporter_id, options)
147- exported_stream = io.BytesIO()
148+ exported_stream = SpooledTemporaryFile(max_size=102400, mode='w+b')
149 exporter.dump_from_session_manager(self._manager, exported_stream)
150 exported_stream.seek(0)
151 result = transport.send(exported_stream)

Subscribers

People subscribed via source and target branches