Merge lp:~cjwatson/launchpad/limit-debdiff into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 17862
Proposed branch: lp:~cjwatson/launchpad/limit-debdiff
Merge into: lp:launchpad
Diff against target: 288 lines (+118/-45)
5 files modified
lib/lp/services/config/schema-lazr.conf (+16/-0)
lib/lp/soyuz/doc/package-diff.txt (+0/-27)
lib/lp/soyuz/model/packagediff.py (+31/-2)
lib/lp/soyuz/model/sourcepackagerelease.py (+2/-12)
lib/lp/soyuz/tests/test_packagediff.py (+69/-4)
To merge this branch: bzr merge lp:~cjwatson/launchpad/limit-debdiff
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+278187@code.launchpad.net

Commit message

Kill debdiff after ten minutes or 1GiB of output by default, and make sure we clean up after it properly. Add a configurable blacklist.

Description of the change

Kill debdiff after ten minutes or 1GiB of output by default, and make sure we clean up after it properly. Add a configurable blacklist.

Some source packages that contain particularly convoluted symlink farms can confuse debdiff into producing exponentially large output, and we should guard ourselves against this possibility. Ten minutes seems to be a reasonable threshold, as it's larger than the time taken for 99.9% of all successful PackageDiffJobs in 2015 to complete, but I've made it configurable in case we need to tweak it in future. I've arranged to set TMPDIR because debdiff creates some of its own temporary files there and may not clean them up properly if it's killed.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)
Revision history for this message
Robert Collins (lifeless) wrote :

It might be a good idea to set a direct metric on the output work too - e.g. set a ulimit on memory, total cpu or when more than some N (e.g. 1M) of output is generated stop immediately.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/services/config/schema-lazr.conf'
--- lib/lp/services/config/schema-lazr.conf 2015-11-17 01:39:17 +0000
+++ lib/lp/services/config/schema-lazr.conf 2015-12-02 11:27:35 +0000
@@ -1387,6 +1387,22 @@
1387port: 112171387port: 11217
13881388
13891389
1390[packagediff]
1391# The timeout in seconds for a debdiff between two packages.
1392# datatype: integer
1393debdiff_timeout: 600
1394
1395# The maximum size in bytes of any file created while debdiffing two
1396# packages.
1397# 10GiB
1398# datatype: integer
1399debdiff_max_size: 10737418240
1400
1401# Packages we never try to generate diffs for.
1402# datatype: string
1403blacklist:
1404
1405
1390[person_notification]1406[person_notification]
1391# User for person notification db access1407# User for person notification db access
1392# datatype: string1408# datatype: string
13931409
=== modified file 'lib/lp/soyuz/doc/package-diff.txt'
--- lib/lp/soyuz/doc/package-diff.txt 2013-08-06 09:49:14 +0000
+++ lib/lp/soyuz/doc/package-diff.txt 2015-12-02 11:27:35 +0000
@@ -649,33 +649,6 @@
649 None649 None
650650
651651
652Problematic packages
653--------------------
654
655XXX 2009-11-23 Julian bug=314436
656Because of bug 314436, diffs of udev can generate huge output which fills the
657disk very quickly. For that reason, diffs of udev are created FAILED by
658default, which will stop the diff script from attempting to create the diff.
659
660 >>> from lp.soyuz.enums import PackagePublishingStatus
661 >>> from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
662 >>> stp = SoyuzTestPublisher()
663 >>> discard = stp.setUpDefaultDistroSeries(hoary)
664 >>> udev_orig = stp.getPubSource(
665 ... sourcename="udev", version="1.0",
666 ... status=PackagePublishingStatus.PUBLISHED)
667 >>> udev_new = stp.getPubSource(
668 ... sourcename="udev", version="1.1",
669 ... status=PackagePublishingStatus.PENDING)
670
671 >>> udev_diff = udev_orig.sourcepackagerelease.requestDiffTo(
672 ... requester=cprov,
673 ... to_sourcepackagerelease=udev_new.sourcepackagerelease)
674
675 >>> print udev_diff.status.name
676 FAILED
677
678
679PackageDiff privacy652PackageDiff privacy
680-------------------653-------------------
681654
682655
=== modified file 'lib/lp/soyuz/model/packagediff.py'
--- lib/lp/soyuz/model/packagediff.py 2015-07-08 16:05:11 +0000
+++ lib/lp/soyuz/model/packagediff.py 2015-12-02 11:27:35 +0000
@@ -1,4 +1,4 @@
1# Copyright 2009-2012 Canonical Ltd. This software is licensed under the1# Copyright 2009-2015 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4__metaclass__ = type4__metaclass__ = type
@@ -7,10 +7,13 @@
7 'PackageDiffSet',7 'PackageDiffSet',
8 ]8 ]
99
10from functools import partial
10import gzip11import gzip
11import itertools12import itertools
12import os13import os
14import resource
13import shutil15import shutil
16import signal
14import subprocess17import subprocess
15import tempfile18import tempfile
1619
@@ -20,6 +23,7 @@
20from zope.component import getUtility23from zope.component import getUtility
21from zope.interface import implementer24from zope.interface import implementer
2225
26from lp.services.config import config
23from lp.services.database.bulk import load27from lp.services.database.bulk import load
24from lp.services.database.constants import UTC_NOW28from lp.services.database.constants import UTC_NOW
25from lp.services.database.datetimecol import UtcDateTimeCol29from lp.services.database.datetimecol import UtcDateTimeCol
@@ -43,6 +47,19 @@
43 )47 )
4448
4549
50def limit_deb_diff(timeout, max_size):
51 """Pre-exec function to apply resource limits to debdiff.
52
53 :param timeout: Time limit in seconds.
54 :param max_size: Maximum output file size in bytes.
55 """
56 signal.alarm(timeout)
57 _, hard_fsize = resource.getrlimit(resource.RLIMIT_FSIZE)
58 if hard_fsize != resource.RLIM_INFINITY and hard_fsize < max_size:
59 max_size = hard_fsize
60 resource.setrlimit(resource.RLIMIT_FSIZE, (max_size, hard_fsize))
61
62
46def perform_deb_diff(tmp_dir, out_filename, from_files, to_files):63def perform_deb_diff(tmp_dir, out_filename, from_files, to_files):
47 """Perform a (deb)diff on two packages.64 """Perform a (deb)diff on two packages.
4865
@@ -67,13 +84,20 @@
67 [to_dsc] = [name for name in to_files84 [to_dsc] = [name for name in to_files
68 if name.lower().endswith('.dsc')]85 if name.lower().endswith('.dsc')]
69 args = ['debdiff', from_dsc, to_dsc]86 args = ['debdiff', from_dsc, to_dsc]
87 env = os.environ.copy()
88 env['TMPDIR'] = tmp_dir
7089
71 full_path = os.path.join(tmp_dir, out_filename)90 full_path = os.path.join(tmp_dir, out_filename)
72 out_file = None91 out_file = None
73 try:92 try:
74 out_file = open(full_path, 'w')93 out_file = open(full_path, 'w')
75 process = subprocess.Popen(94 process = subprocess.Popen(
76 args, stdout=out_file, stderr=subprocess.PIPE, cwd=tmp_dir)95 args, stdout=out_file, stderr=subprocess.PIPE,
96 preexec_fn=partial(
97 limit_deb_diff,
98 config.packagediff.debdiff_timeout,
99 config.packagediff.debdiff_max_size),
100 cwd=tmp_dir, env=env)
77 stdout, stderr = process.communicate()101 stdout, stderr = process.communicate()
78 finally:102 finally:
79 if out_file is not None:103 if out_file is not None:
@@ -173,6 +197,11 @@
173 self.status = PackageDiffStatus.FAILED197 self.status = PackageDiffStatus.FAILED
174 return198 return
175199
200 blacklist = config.packagediff.blacklist.split()
201 if self.from_source.sourcepackagename.name in blacklist:
202 self.status = PackageDiffStatus.FAILED
203 return
204
176 # Create the temporary directory where the files will be205 # Create the temporary directory where the files will be
177 # downloaded to and where the debdiff will be performed.206 # downloaded to and where the debdiff will be performed.
178 tmp_dir = tempfile.mkdtemp()207 tmp_dir = tempfile.mkdtemp()
179208
=== modified file 'lib/lp/soyuz/model/sourcepackagerelease.py'
--- lib/lp/soyuz/model/sourcepackagerelease.py 2015-07-08 16:05:11 +0000
+++ lib/lp/soyuz/model/sourcepackagerelease.py 2015-12-02 11:27:35 +0000
@@ -60,7 +60,6 @@
60 cachedproperty,60 cachedproperty,
61 get_property_cache,61 get_property_cache,
62 )62 )
63from lp.soyuz.enums import PackageDiffStatus
64from lp.soyuz.interfaces.archive import MAIN_ARCHIVE_PURPOSES63from lp.soyuz.interfaces.archive import MAIN_ARCHIVE_PURPOSES
65from lp.soyuz.interfaces.packagediff import PackageDiffAlreadyRequested64from lp.soyuz.interfaces.packagediff import PackageDiffAlreadyRequested
66from lp.soyuz.interfaces.packagediffjob import IPackageDiffJobSource65from lp.soyuz.interfaces.packagediffjob import IPackageDiffJobSource
@@ -395,21 +394,12 @@
395 raise PackageDiffAlreadyRequested(394 raise PackageDiffAlreadyRequested(
396 "%s has already been requested" % candidate.title)395 "%s has already been requested" % candidate.title)
397396
398 if self.sourcepackagename.name == 'udev':
399 # XXX 2009-11-23 Julian bug=314436
400 # Currently diff output for udev will fill disks. It's
401 # disabled until diffutils is fixed in that bug.
402 status = PackageDiffStatus.FAILED
403 else:
404 status = PackageDiffStatus.PENDING
405
406 Store.of(to_sourcepackagerelease).flush()397 Store.of(to_sourcepackagerelease).flush()
407 del get_property_cache(to_sourcepackagerelease).package_diffs398 del get_property_cache(to_sourcepackagerelease).package_diffs
408 packagediff = PackageDiff(399 packagediff = PackageDiff(
409 from_source=self, to_source=to_sourcepackagerelease,400 from_source=self, to_source=to_sourcepackagerelease,
410 requester=requester, status=status)401 requester=requester)
411 if status == PackageDiffStatus.PENDING:402 getUtility(IPackageDiffJobSource).create(packagediff)
412 getUtility(IPackageDiffJobSource).create(packagediff)
413 return packagediff403 return packagediff
414404
415 def aggregate_changelog(self, since_version):405 def aggregate_changelog(self, since_version):
416406
=== modified file 'lib/lp/soyuz/tests/test_packagediff.py'
--- lib/lp/soyuz/tests/test_packagediff.py 2013-07-31 00:37:32 +0000
+++ lib/lp/soyuz/tests/test_packagediff.py 2015-12-02 11:27:35 +0000
@@ -1,13 +1,18 @@
1# Copyright 2010-2013 Canonical Ltd. This software is licensed under the1# Copyright 2010-2015 Canonical Ltd. This software is licensed under the
2# GNU Affero General Public License version 3 (see the file LICENSE).2# GNU Affero General Public License version 3 (see the file LICENSE).
33
4"""Test source package diffs."""4"""Test source package diffs."""
55
6from __future__ import print_function
7
6__metaclass__ = type8__metaclass__ = type
79
8from datetime import datetime10from datetime import datetime
11import errno
9import os.path12import os.path
13from textwrap import dedent
1014
15from fixtures import EnvironmentVariableFixture
11import transaction16import transaction
12from zope.security.proxy import removeSecurityProxy17from zope.security.proxy import removeSecurityProxy
1318
@@ -23,10 +28,12 @@
23from lp.testing.layers import LaunchpadZopelessLayer28from lp.testing.layers import LaunchpadZopelessLayer
2429
2530
26def create_proper_job(factory):31def create_proper_job(factory, sourcepackagename=None):
27 archive = factory.makeArchive()32 archive = factory.makeArchive()
28 foo_dash1 = factory.makeSourcePackageRelease(archive=archive)33 foo_dash1 = factory.makeSourcePackageRelease(
29 foo_dash15 = factory.makeSourcePackageRelease(archive=archive)34 archive=archive, sourcepackagename=sourcepackagename)
35 foo_dash15 = factory.makeSourcePackageRelease(
36 archive=archive, sourcepackagename=sourcepackagename)
30 suite_dir = 'lib/lp/archiveuploader/tests/data/suite'37 suite_dir = 'lib/lp/archiveuploader/tests/data/suite'
31 files = {38 files = {
32 '%s/foo_1.0-1/foo_1.0-1.diff.gz' % suite_dir: None,39 '%s/foo_1.0-1/foo_1.0-1.diff.gz' % suite_dir: None,
@@ -156,3 +163,61 @@
156 [job] = IStore(Job).find(163 [job] = IStore(Job).find(
157 Job, Job.base_job_type == JobType.GENERATE_PACKAGE_DIFF)164 Job, Job.base_job_type == JobType.GENERATE_PACKAGE_DIFF)
158 self.assertIsNot(None, job)165 self.assertIsNot(None, job)
166
167 def test_packagediff_timeout(self):
168 # debdiff is killed after the time limit expires.
169 self.pushConfig("packagediff", debdiff_timeout=1)
170 temp_dir = self.makeTemporaryDirectory()
171 mock_debdiff_path = os.path.join(temp_dir, "debdiff")
172 marker_path = os.path.join(temp_dir, "marker")
173 with open(mock_debdiff_path, "w") as mock_debdiff:
174 print(dedent("""\
175 #! /bin/sh
176 (echo "$$"; echo "$TMPDIR") >%s
177 sleep 5
178 """ % marker_path), end="", file=mock_debdiff)
179 os.chmod(mock_debdiff_path, 0o755)
180 mock_path = "%s:%s" % (temp_dir, os.environ["PATH"])
181 diff = create_proper_job(self.factory)
182 with EnvironmentVariableFixture("PATH", mock_path):
183 diff.performDiff()
184 self.assertEqual(PackageDiffStatus.FAILED, diff.status)
185 with open(marker_path) as marker:
186 debdiff_pid = int(marker.readline())
187 debdiff_tmpdir = marker.readline().rstrip("\n")
188 err = self.assertRaises(OSError, os.kill, debdiff_pid, 0)
189 self.assertEqual(errno.ESRCH, err.errno)
190 self.assertFalse(os.path.exists(debdiff_tmpdir))
191
192 def test_packagediff_max_size(self):
193 # debdiff is killed if it generates more than the size limit.
194 self.pushConfig("packagediff", debdiff_max_size=1024)
195 temp_dir = self.makeTemporaryDirectory()
196 mock_debdiff_path = os.path.join(temp_dir, "debdiff")
197 marker_path = os.path.join(temp_dir, "marker")
198 with open(mock_debdiff_path, "w") as mock_debdiff:
199 print(dedent("""\
200 #! /bin/sh
201 (echo "$$"; echo "$TMPDIR") >%s
202 yes | head -n2048 || exit 2
203 sleep 5
204 """ % marker_path), end="", file=mock_debdiff)
205 os.chmod(mock_debdiff_path, 0o755)
206 mock_path = "%s:%s" % (temp_dir, os.environ["PATH"])
207 diff = create_proper_job(self.factory)
208 with EnvironmentVariableFixture("PATH", mock_path):
209 diff.performDiff()
210 self.assertEqual(PackageDiffStatus.FAILED, diff.status)
211 with open(marker_path) as marker:
212 debdiff_pid = int(marker.readline())
213 debdiff_tmpdir = marker.readline().rstrip("\n")
214 err = self.assertRaises(OSError, os.kill, debdiff_pid, 0)
215 self.assertEqual(errno.ESRCH, err.errno)
216 self.assertFalse(os.path.exists(debdiff_tmpdir))
217
218 def test_packagediff_blacklist(self):
219 # Package diff jobs for blacklisted package names do nothing.
220 self.pushConfig("packagediff", blacklist="udev cordova-cli")
221 diff = create_proper_job(self.factory, sourcepackagename="cordova-cli")
222 diff.performDiff()
223 self.assertEqual(PackageDiffStatus.FAILED, diff.status)