Merge lp:~cjwatson/launchpad-buildd/large-build-artifacts into lp:launchpad-buildd

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: 80
Merged at revision: 78
Proposed branch: lp:~cjwatson/launchpad-buildd/large-build-artifacts
Merge into: lp:launchpad-buildd
Diff against target: 198 lines (+71/-34)
5 files modified
debian/changelog (+4/-0)
lpbuildd/binarypackage.py (+12/-7)
lpbuildd/debian.py (+37/-13)
lpbuildd/slave.py (+17/-12)
lpbuildd/sourcepackagerecipe.py (+1/-2)
To merge this branch: bzr merge lp:~cjwatson/launchpad-buildd/large-build-artifacts
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+186503@code.launchpad.net

Commit message

Avoid trying to read entire log or result files into memory in one go.

Description of the change

This branch rearranges the buildd slave to avoid trying to read entire log or result files into memory in one go.

For result files, this is fairly trivial. For log files, we have to search them so it's not so trivial. However, matches will not span the whole file, so we can do this using a sliding window.

This has efficiency problems of its own. I think they'll generally be acceptable for the moment; for instance I can use this code to search a 2GB file for three regexes in 31 seconds on my laptop, and grep takes 11 seconds for one regex; also, we used to read the entire file in some cases even if we weren't going to search it (e.g. successful builds), so this should improve slave throughput regardless.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) wrote :

I'd consider doing storeFile in one pass by writing to a temporary file first. We made a similar change in the other cache code to avoid leaving truncated files on failure, so it'd be a double win. But it's less important here.

review: Approve (code)
79. By Colin Watson

merge trunk

80. By Colin Watson

Convert storeFile back to a single pass.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'debian/changelog'
2--- debian/changelog 2013-09-24 08:54:16 +0000
3+++ debian/changelog 2013-09-24 09:41:17 +0000
4@@ -5,6 +5,10 @@
5 * Remove obsolete BuildDSlave.fetchFile method, unused since October 2005.
6 * If the expected .changes file doesn't exist, consider this as a package
7 build failure rather than crashing (LP: #993642).
8+ * Don't attempt to read entire files into memory at once when storing them
9+ in the file cache.
10+ * Rearrange build log searching to avoid reading the entire build log into
11+ memory at once (LP: #1227086).
12
13 [ Adam Conrad ]
14 * Tidy up log formatting of the "Already reaped..." message.
15
16=== modified file 'lpbuildd/binarypackage.py'
17--- lpbuildd/binarypackage.py 2013-09-09 14:18:35 +0000
18+++ lpbuildd/binarypackage.py 2013-09-24 09:41:17 +0000
19@@ -88,25 +88,30 @@
20
21 def iterate_SBUILD(self, success):
22 """Finished the sbuild run."""
23- tmpLog = self.getTmpLogContents()
24 if success != SBuildExitCodes.OK:
25+ log_patterns = []
26+
27 if (success == SBuildExitCodes.DEPFAIL or
28 success == SBuildExitCodes.PACKAGEFAIL):
29 for rx in BuildLogRegexes.GIVENBACK:
30- mo = re.search(rx, tmpLog, re.M)
31- if mo:
32- success = SBuildExitCodes.GIVENBACK
33+ log_patterns.append([rx, re.M])
34
35 if success == SBuildExitCodes.DEPFAIL:
36 for rx, dep in BuildLogRegexes.DEPFAIL:
37- mo = re.search(rx, tmpLog, re.M)
38- if mo:
39+ log_patterns.append([rx, re.M])
40+
41+ if log_patterns:
42+ rx, mo = self.searchLogContents(log_patterns)
43+ if mo:
44+ if rx in BuildLogRegexes.GIVENBACK:
45+ success = SBuildExitCodes.GIVENBACK
46+ elif rx in BuildLogRegexes.DEPFAIL:
47 if not self.alreadyfailed:
48+ dep = BuildLogRegexes.DEPFAIL[rx]
49 print("Returning build status: DEPFAIL")
50 print("Dependencies: " + mo.expand(dep))
51 self._slave.depFail(mo.expand(dep))
52 success = SBuildExitCodes.DEPFAIL
53- break
54 else:
55 success = SBuildExitCodes.PACKAGEFAIL
56
57
58=== modified file 'lpbuildd/debian.py'
59--- lpbuildd/debian.py 2013-08-12 04:29:33 +0000
60+++ lpbuildd/debian.py 2013-09-24 09:41:17 +0000
61@@ -9,6 +9,7 @@
62 __metaclass__ = type
63
64 import os
65+import re
66
67 from lpbuildd.slave import (
68 BuildManager,
69@@ -100,16 +101,15 @@
70 The primary file we care about is the .changes file. We key from there.
71 """
72 path = self.getChangesFilename()
73- name = os.path.basename(path)
74+ self._slave.addWaitingFile(path)
75+
76 chfile = open(path, "r")
77- self._slave.waitingfiles[name] = self._slave.storeFile(chfile.read())
78- chfile.seek(0)
79-
80- for fn in self._parseChangesFile(chfile):
81- self._slave.addWaitingFile(
82- get_build_path(self.home, self._buildid, fn))
83-
84- chfile.close()
85+ try:
86+ for fn in self._parseChangesFile(chfile):
87+ self._slave.addWaitingFile(
88+ get_build_path(self.home, self._buildid, fn))
89+ finally:
90+ chfile.close()
91
92 def iterate(self, success):
93 # When a Twisted ProcessControl class is killed by SIGTERM,
94@@ -173,12 +173,36 @@
95 self._state = DebianBuildState.UPDATE
96 self.doUpdateChroot()
97
98- def getTmpLogContents(self):
99+ def searchLogContents(self, patterns_and_flags):
100+ """Search for any of a list of regex patterns in the build log.
101+
102+ The build log is matched using a sliding window, which avoids having
103+ to read the whole file into memory at once but requires that matches
104+ be no longer than the chunk size (currently 256KiB).
105+
106+ :return: A tuple of the regex pattern that matched and the match
107+ object, or (None, None).
108+ """
109+ chunk_size = 256 * 1024
110+ regexes = [
111+ re.compile(pattern, flags)
112+ for pattern, flags in patterns_and_flags]
113+ log = open(os.path.join(self._cachepath, "buildlog"))
114 try:
115- tmpLogHandle = open(os.path.join(self._cachepath, "buildlog"))
116- return tmpLogHandle.read()
117+ window = ""
118+ chunk = log.read(chunk_size)
119+ while chunk:
120+ window += chunk
121+ for regex in regexes:
122+ match = regex.search(window)
123+ if match is not None:
124+ return regex.pattern, match
125+ if len(window) > chunk_size:
126+ window = window[chunk_size:]
127+ chunk = log.read(chunk_size)
128 finally:
129- tmpLogHandle.close()
130+ log.close()
131+ return None, None
132
133 def iterate_SOURCES(self, success):
134 """Just finished overwriting sources.list."""
135
136=== modified file 'lpbuildd/slave.py'
137--- lpbuildd/slave.py 2013-09-18 12:20:31 +0000
138+++ lpbuildd/slave.py 2013-09-24 09:41:17 +0000
139@@ -376,25 +376,30 @@
140 self.log(extra_info)
141 return (os.path.exists(cachefile), extra_info)
142
143- def storeFile(self, content):
144- """Take the provided content and store it in the file cache."""
145- sha1sum = hashlib.sha1(content).hexdigest()
146+ def storeFile(self, path):
147+ """Store the content of the provided path in the file cache."""
148+ f = open(path)
149+ tmppath = self.cachePath("storeFile.tmp")
150+ of = open(tmppath, "w")
151+ try:
152+ sha1 = hashlib.sha1()
153+ for chunk in iter(lambda: f.read(256*1024), ''):
154+ sha1.update(chunk)
155+ of.write(chunk)
156+ sha1sum = sha1.hexdigest()
157+ finally:
158+ of.close()
159+ f.close()
160 present, info = self.ensurePresent(sha1sum)
161 if present:
162+ os.unlink(tmppath)
163 return sha1sum
164- f = open(self.cachePath(sha1sum), "w")
165- f.write(content)
166- f.close()
167+ os.rename(tmppath, self.cachePath(sha1sum))
168 return sha1sum
169
170 def addWaitingFile(self, path):
171 """Add a file to the cache and store its details for reporting."""
172- fn = os.path.basename(path)
173- f = open(path)
174- try:
175- self.waitingfiles[fn] = self.storeFile(f.read())
176- finally:
177- f.close()
178+ self.waitingfiles[os.path.basename(path)] = self.storeFile(path)
179
180 def abort(self):
181 """Abort the current build."""
182
183=== modified file 'lpbuildd/sourcepackagerecipe.py'
184--- lpbuildd/sourcepackagerecipe.py 2013-08-28 22:41:24 +0000
185+++ lpbuildd/sourcepackagerecipe.py 2013-09-24 09:41:17 +0000
186@@ -99,11 +99,10 @@
187 print("Returning build status: OK")
188 elif retcode == RETCODE_FAILURE_INSTALL_BUILD_DEPS:
189 if not self.alreadyfailed:
190- tmpLog = self.getTmpLogContents()
191 rx = (
192 'The following packages have unmet dependencies:\n'
193 '.*: Depends: ([^ ]*( \([^)]*\))?)')
194- mo = re.search(rx, tmpLog, re.M)
195+ _, mo = self.searchLogContents([rx, re.M])
196 if mo:
197 self._slave.depFail(mo.group(1))
198 print("Returning build status: DEPFAIL")

Subscribers

People subscribed via source and target branches

to all changes: