Merge lp:~cjwatson/launchpad/limit-debdiff into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 17862
Proposed branch: lp:~cjwatson/launchpad/limit-debdiff
Merge into: lp:launchpad
Diff against target: 288 lines (+118/-45)
5 files modified
lib/lp/services/config/schema-lazr.conf (+16/-0)
lib/lp/soyuz/doc/package-diff.txt (+0/-27)
lib/lp/soyuz/model/packagediff.py (+31/-2)
lib/lp/soyuz/model/sourcepackagerelease.py (+2/-12)
lib/lp/soyuz/tests/test_packagediff.py (+69/-4)
To merge this branch: bzr merge lp:~cjwatson/launchpad/limit-debdiff
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+278187@code.launchpad.net

Commit message

Kill debdiff after ten minutes or 1GiB of output by default, and make sure we clean up after it properly. Add a configurable blacklist.

Description of the change

Kill debdiff after ten minutes or 1GiB of output by default, and make sure we clean up after it properly. Add a configurable blacklist.

Some source packages that contain particularly convoluted symlink farms can confuse debdiff into producing exponentially large output, and we should guard ourselves against this possibility. Ten minutes seems to be a reasonable threshold, as it's larger than the time taken for 99.9% of all successful PackageDiffJobs in 2015 to complete, but I've made it configurable in case we need to tweak it in future. I've arranged to set TMPDIR because debdiff creates some of its own temporary files there and may not clean them up properly if it's killed.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)
Revision history for this message
Robert Collins (lifeless) wrote :

It might be a good idea to set a direct metric on the output work too - e.g. set a ulimit on memory, total cpu or when more than some N (e.g. 1M) of output is generated stop immediately.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/config/schema-lazr.conf'
2--- lib/lp/services/config/schema-lazr.conf 2015-11-17 01:39:17 +0000
3+++ lib/lp/services/config/schema-lazr.conf 2015-12-02 11:27:35 +0000
4@@ -1387,6 +1387,22 @@
5 port: 11217
6
7
8+[packagediff]
9+# The timeout in seconds for a debdiff between two packages.
10+# datatype: integer
11+debdiff_timeout: 600
12+
13+# The maximum size in bytes of any file created while debdiffing two
14+# packages.
15+# 10GiB
16+# datatype: integer
17+debdiff_max_size: 10737418240
18+
19+# Packages we never try to generate diffs for.
20+# datatype: string
21+blacklist:
22+
23+
24 [person_notification]
25 # User for person notification db access
26 # datatype: string
27
28=== modified file 'lib/lp/soyuz/doc/package-diff.txt'
29--- lib/lp/soyuz/doc/package-diff.txt 2013-08-06 09:49:14 +0000
30+++ lib/lp/soyuz/doc/package-diff.txt 2015-12-02 11:27:35 +0000
31@@ -649,33 +649,6 @@
32 None
33
34
35-Problematic packages
36---------------------
37-
38-XXX 2009-11-23 Julian bug=314436
39-Because of bug 314436, diffs of udev can generate huge output which fills the
40-disk very quickly. For that reason, diffs of udev are created FAILED by
41-default, which will stop the diff script from attempting to create the diff.
42-
43- >>> from lp.soyuz.enums import PackagePublishingStatus
44- >>> from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
45- >>> stp = SoyuzTestPublisher()
46- >>> discard = stp.setUpDefaultDistroSeries(hoary)
47- >>> udev_orig = stp.getPubSource(
48- ... sourcename="udev", version="1.0",
49- ... status=PackagePublishingStatus.PUBLISHED)
50- >>> udev_new = stp.getPubSource(
51- ... sourcename="udev", version="1.1",
52- ... status=PackagePublishingStatus.PENDING)
53-
54- >>> udev_diff = udev_orig.sourcepackagerelease.requestDiffTo(
55- ... requester=cprov,
56- ... to_sourcepackagerelease=udev_new.sourcepackagerelease)
57-
58- >>> print udev_diff.status.name
59- FAILED
60-
61-
62 PackageDiff privacy
63 -------------------
64
65
66=== modified file 'lib/lp/soyuz/model/packagediff.py'
67--- lib/lp/soyuz/model/packagediff.py 2015-07-08 16:05:11 +0000
68+++ lib/lp/soyuz/model/packagediff.py 2015-12-02 11:27:35 +0000
69@@ -1,4 +1,4 @@
70-# Copyright 2009-2012 Canonical Ltd. This software is licensed under the
71+# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
72 # GNU Affero General Public License version 3 (see the file LICENSE).
73
74 __metaclass__ = type
75@@ -7,10 +7,13 @@
76 'PackageDiffSet',
77 ]
78
79+from functools import partial
80 import gzip
81 import itertools
82 import os
83+import resource
84 import shutil
85+import signal
86 import subprocess
87 import tempfile
88
89@@ -20,6 +23,7 @@
90 from zope.component import getUtility
91 from zope.interface import implementer
92
93+from lp.services.config import config
94 from lp.services.database.bulk import load
95 from lp.services.database.constants import UTC_NOW
96 from lp.services.database.datetimecol import UtcDateTimeCol
97@@ -43,6 +47,19 @@
98 )
99
100
101+def limit_deb_diff(timeout, max_size):
102+ """Pre-exec function to apply resource limits to debdiff.
103+
104+ :param timeout: Time limit in seconds.
105+ :param max_size: Maximum output file size in bytes.
106+ """
107+ signal.alarm(timeout)
108+ _, hard_fsize = resource.getrlimit(resource.RLIMIT_FSIZE)
109+ if hard_fsize != resource.RLIM_INFINITY and hard_fsize < max_size:
110+ max_size = hard_fsize
111+ resource.setrlimit(resource.RLIMIT_FSIZE, (max_size, hard_fsize))
112+
113+
114 def perform_deb_diff(tmp_dir, out_filename, from_files, to_files):
115 """Perform a (deb)diff on two packages.
116
117@@ -67,13 +84,20 @@
118 [to_dsc] = [name for name in to_files
119 if name.lower().endswith('.dsc')]
120 args = ['debdiff', from_dsc, to_dsc]
121+ env = os.environ.copy()
122+ env['TMPDIR'] = tmp_dir
123
124 full_path = os.path.join(tmp_dir, out_filename)
125 out_file = None
126 try:
127 out_file = open(full_path, 'w')
128 process = subprocess.Popen(
129- args, stdout=out_file, stderr=subprocess.PIPE, cwd=tmp_dir)
130+ args, stdout=out_file, stderr=subprocess.PIPE,
131+ preexec_fn=partial(
132+ limit_deb_diff,
133+ config.packagediff.debdiff_timeout,
134+ config.packagediff.debdiff_max_size),
135+ cwd=tmp_dir, env=env)
136 stdout, stderr = process.communicate()
137 finally:
138 if out_file is not None:
139@@ -173,6 +197,11 @@
140 self.status = PackageDiffStatus.FAILED
141 return
142
143+ blacklist = config.packagediff.blacklist.split()
144+ if self.from_source.sourcepackagename.name in blacklist:
145+ self.status = PackageDiffStatus.FAILED
146+ return
147+
148 # Create the temporary directory where the files will be
149 # downloaded to and where the debdiff will be performed.
150 tmp_dir = tempfile.mkdtemp()
151
152=== modified file 'lib/lp/soyuz/model/sourcepackagerelease.py'
153--- lib/lp/soyuz/model/sourcepackagerelease.py 2015-07-08 16:05:11 +0000
154+++ lib/lp/soyuz/model/sourcepackagerelease.py 2015-12-02 11:27:35 +0000
155@@ -60,7 +60,6 @@
156 cachedproperty,
157 get_property_cache,
158 )
159-from lp.soyuz.enums import PackageDiffStatus
160 from lp.soyuz.interfaces.archive import MAIN_ARCHIVE_PURPOSES
161 from lp.soyuz.interfaces.packagediff import PackageDiffAlreadyRequested
162 from lp.soyuz.interfaces.packagediffjob import IPackageDiffJobSource
163@@ -395,21 +394,12 @@
164 raise PackageDiffAlreadyRequested(
165 "%s has already been requested" % candidate.title)
166
167- if self.sourcepackagename.name == 'udev':
168- # XXX 2009-11-23 Julian bug=314436
169- # Currently diff output for udev will fill disks. It's
170- # disabled until diffutils is fixed in that bug.
171- status = PackageDiffStatus.FAILED
172- else:
173- status = PackageDiffStatus.PENDING
174-
175 Store.of(to_sourcepackagerelease).flush()
176 del get_property_cache(to_sourcepackagerelease).package_diffs
177 packagediff = PackageDiff(
178 from_source=self, to_source=to_sourcepackagerelease,
179- requester=requester, status=status)
180- if status == PackageDiffStatus.PENDING:
181- getUtility(IPackageDiffJobSource).create(packagediff)
182+ requester=requester)
183+ getUtility(IPackageDiffJobSource).create(packagediff)
184 return packagediff
185
186 def aggregate_changelog(self, since_version):
187
188=== modified file 'lib/lp/soyuz/tests/test_packagediff.py'
189--- lib/lp/soyuz/tests/test_packagediff.py 2013-07-31 00:37:32 +0000
190+++ lib/lp/soyuz/tests/test_packagediff.py 2015-12-02 11:27:35 +0000
191@@ -1,13 +1,18 @@
192-# Copyright 2010-2013 Canonical Ltd. This software is licensed under the
193+# Copyright 2010-2015 Canonical Ltd. This software is licensed under the
194 # GNU Affero General Public License version 3 (see the file LICENSE).
195
196 """Test source package diffs."""
197
198+from __future__ import print_function
199+
200 __metaclass__ = type
201
202 from datetime import datetime
203+import errno
204 import os.path
205+from textwrap import dedent
206
207+from fixtures import EnvironmentVariableFixture
208 import transaction
209 from zope.security.proxy import removeSecurityProxy
210
211@@ -23,10 +28,12 @@
212 from lp.testing.layers import LaunchpadZopelessLayer
213
214
215-def create_proper_job(factory):
216+def create_proper_job(factory, sourcepackagename=None):
217 archive = factory.makeArchive()
218- foo_dash1 = factory.makeSourcePackageRelease(archive=archive)
219- foo_dash15 = factory.makeSourcePackageRelease(archive=archive)
220+ foo_dash1 = factory.makeSourcePackageRelease(
221+ archive=archive, sourcepackagename=sourcepackagename)
222+ foo_dash15 = factory.makeSourcePackageRelease(
223+ archive=archive, sourcepackagename=sourcepackagename)
224 suite_dir = 'lib/lp/archiveuploader/tests/data/suite'
225 files = {
226 '%s/foo_1.0-1/foo_1.0-1.diff.gz' % suite_dir: None,
227@@ -156,3 +163,61 @@
228 [job] = IStore(Job).find(
229 Job, Job.base_job_type == JobType.GENERATE_PACKAGE_DIFF)
230 self.assertIsNot(None, job)
231+
232+ def test_packagediff_timeout(self):
233+ # debdiff is killed after the time limit expires.
234+ self.pushConfig("packagediff", debdiff_timeout=1)
235+ temp_dir = self.makeTemporaryDirectory()
236+ mock_debdiff_path = os.path.join(temp_dir, "debdiff")
237+ marker_path = os.path.join(temp_dir, "marker")
238+ with open(mock_debdiff_path, "w") as mock_debdiff:
239+ print(dedent("""\
240+ #! /bin/sh
241+ (echo "$$"; echo "$TMPDIR") >%s
242+ sleep 5
243+ """ % marker_path), end="", file=mock_debdiff)
244+ os.chmod(mock_debdiff_path, 0o755)
245+ mock_path = "%s:%s" % (temp_dir, os.environ["PATH"])
246+ diff = create_proper_job(self.factory)
247+ with EnvironmentVariableFixture("PATH", mock_path):
248+ diff.performDiff()
249+ self.assertEqual(PackageDiffStatus.FAILED, diff.status)
250+ with open(marker_path) as marker:
251+ debdiff_pid = int(marker.readline())
252+ debdiff_tmpdir = marker.readline().rstrip("\n")
253+ err = self.assertRaises(OSError, os.kill, debdiff_pid, 0)
254+ self.assertEqual(errno.ESRCH, err.errno)
255+ self.assertFalse(os.path.exists(debdiff_tmpdir))
256+
257+ def test_packagediff_max_size(self):
258+ # debdiff is killed if it generates more than the size limit.
259+ self.pushConfig("packagediff", debdiff_max_size=1024)
260+ temp_dir = self.makeTemporaryDirectory()
261+ mock_debdiff_path = os.path.join(temp_dir, "debdiff")
262+ marker_path = os.path.join(temp_dir, "marker")
263+ with open(mock_debdiff_path, "w") as mock_debdiff:
264+ print(dedent("""\
265+ #! /bin/sh
266+ (echo "$$"; echo "$TMPDIR") >%s
267+ yes | head -n2048 || exit 2
268+ sleep 5
269+ """ % marker_path), end="", file=mock_debdiff)
270+ os.chmod(mock_debdiff_path, 0o755)
271+ mock_path = "%s:%s" % (temp_dir, os.environ["PATH"])
272+ diff = create_proper_job(self.factory)
273+ with EnvironmentVariableFixture("PATH", mock_path):
274+ diff.performDiff()
275+ self.assertEqual(PackageDiffStatus.FAILED, diff.status)
276+ with open(marker_path) as marker:
277+ debdiff_pid = int(marker.readline())
278+ debdiff_tmpdir = marker.readline().rstrip("\n")
279+ err = self.assertRaises(OSError, os.kill, debdiff_pid, 0)
280+ self.assertEqual(errno.ESRCH, err.errno)
281+ self.assertFalse(os.path.exists(debdiff_tmpdir))
282+
283+ def test_packagediff_blacklist(self):
284+ # Package diff jobs for blacklisted package names do nothing.
285+ self.pushConfig("packagediff", blacklist="udev cordova-cli")
286+ diff = create_proper_job(self.factory, sourcepackagename="cordova-cli")
287+ diff.performDiff()
288+ self.assertEqual(PackageDiffStatus.FAILED, diff.status)