Merge lp:~cjwatson/launchpad/copy-lock-archive into lp:launchpad

Proposed by Colin Watson
Status: Rejected
Rejected by: Colin Watson
Proposed branch: lp:~cjwatson/launchpad/copy-lock-archive
Merge into: lp:launchpad
Diff against target: 136 lines (+57/-3)
3 files modified
lib/lp/services/database/locking.py (+5/-0)
lib/lp/soyuz/model/packagecopyjob.py (+11/-3)
lib/lp/soyuz/tests/test_packagecopyjob.py (+41/-0)
To merge this branch: bzr merge lp:~cjwatson/launchpad/copy-lock-archive
Reviewer Review Type Date Requested Status
William Grant Needs Information
Review via email: mp+279275@code.launchpad.net

Commit message

Take an advisory lock on the target archive when copying packages.

Description of the change

Take an advisory lock on the target archive when copying packages; otherwise it's possible for multiple copies of the same package into different series in the same archive to race with the conflict checker and create conflicting builds.

The main potential downside that I can think of is that a large mass sync into the Ubuntu archive could potentially result in quite a few copies hitting the lock and needing to be retried repeatedly, taking those jobs over the retry limit. I *think* that the ten-minute delay on retries means that it's unlikely we'll hit the retry limit in practice, but it's possible. I'd welcome advice on whether this is a problem and if so what we might do about it.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) wrote :

I think this will be a terrible thundering herd problem when bulk copies are performed to a single archive.

review: Needs Information
Revision history for this message
Colin Watson (cjwatson) wrote :

Unmerged revisions

17861. By Colin Watson

Take an advisory lock on the target archive when copying packages.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/database/locking.py'
2--- lib/lp/services/database/locking.py 2015-03-16 16:22:19 +0000
3+++ lib/lp/services/database/locking.py 2015-12-02 13:59:35 +0000
4@@ -39,6 +39,11 @@
5 Git repository reference scan.
6 """)
7
8+ PACKAGE_COPY = DBItem(2, """Package copy.
9+
10+ Package copy.
11+ """)
12+
13
14 @contextmanager
15 def try_advisory_lock(lock_type, lock_id, store):
16
17=== modified file 'lib/lp/soyuz/model/packagecopyjob.py'
18--- lib/lp/soyuz/model/packagecopyjob.py 2015-07-09 20:06:17 +0000
19+++ lib/lp/soyuz/model/packagecopyjob.py 2015-12-02 13:59:35 +0000
20@@ -49,6 +49,11 @@
21 IMasterStore,
22 IStore,
23 )
24+from lp.services.database.locking import (
25+ AdvisoryLockHeld,
26+ LockType,
27+ try_advisory_lock,
28+ )
29 from lp.services.database.stormbase import StormBase
30 from lp.services.job.interfaces.job import JobStatus
31 from lp.services.job.model.job import (
32@@ -266,7 +271,7 @@
33 user_error_types = (CannotCopy,)
34 # Raised when closing bugs ends up hitting another process and
35 # deadlocking.
36- retry_error_types = (TransactionRollbackError,)
37+ retry_error_types = (TransactionRollbackError, AdvisoryLockHeld)
38 max_retries = 5
39
40 @classmethod
41@@ -575,7 +580,10 @@
42 def run(self):
43 """See `IRunnableJob`."""
44 try:
45- self.attemptCopy()
46+ with try_advisory_lock(
47+ LockType.PACKAGE_COPY, self.target_archive_id,
48+ IStore(Archive)):
49+ self.attemptCopy()
50 except CannotCopy as e:
51 # Remember the target archive purpose, as otherwise aborting the
52 # transaction will forget it.
53@@ -600,7 +608,7 @@
54 # the job. We will normally have a DistroSeriesDifference
55 # in this case.
56 pass
57- except SuspendJobException:
58+ except (SuspendJobException, AdvisoryLockHeld):
59 raise
60 except:
61 # Abort work done so far, but make sure that we commit the
62
63=== modified file 'lib/lp/soyuz/tests/test_packagecopyjob.py'
64--- lib/lp/soyuz/tests/test_packagecopyjob.py 2015-09-03 15:14:07 +0000
65+++ lib/lp/soyuz/tests/test_packagecopyjob.py 2015-12-02 13:59:35 +0000
66@@ -4,8 +4,11 @@
67 """Tests for sync package jobs."""
68
69 import operator
70+import os
71+import signal
72 from textwrap import dedent
73
74+from fixtures import FakeLogger
75 from storm.store import Store
76 from testtools.content import text_content
77 from testtools.matchers import (
78@@ -26,6 +29,10 @@
79 )
80 from lp.services.config import config
81 from lp.services.database.interfaces import IStore
82+from lp.services.database.locking import (
83+ LockType,
84+ try_advisory_lock,
85+ )
86 from lp.services.features.testing import FeatureFixture
87 from lp.services.job.interfaces.job import JobStatus
88 from lp.services.job.runner import JobRunner
89@@ -56,6 +63,7 @@
90 from lp.soyuz.interfaces.sourcepackageformat import (
91 ISourcePackageFormatSelectionSet,
92 )
93+from lp.soyuz.model.archive import Archive
94 from lp.soyuz.model.packagecopyjob import PackageCopyJob
95 from lp.soyuz.model.queue import PackageUpload
96 from lp.soyuz.tests.test_publishing import SoyuzTestPublisher
97@@ -326,6 +334,39 @@
98
99 self.assertRaises(Boom, job.run)
100
101+ def test_run_tries_advisory_lock(self):
102+ # A job is retried if an advisory lock for the same archive is held.
103+ logger = self.useFixture(FakeLogger())
104+ job = create_proper_job(self.factory)
105+ switch_dbuser(self.dbuser)
106+ # Fork so that we can take an advisory lock from a different
107+ # PostgreSQL session.
108+ read, write = os.pipe()
109+ pid = os.fork()
110+ if pid == 0: # child
111+ os.close(read)
112+ with try_advisory_lock(
113+ LockType.PACKAGE_COPY, job.target_archive_id,
114+ IStore(Archive)):
115+ os.write(write, b"1")
116+ try:
117+ signal.pause()
118+ except KeyboardInterrupt:
119+ pass
120+ os._exit(0)
121+ else: # parent
122+ try:
123+ os.close(write)
124+ os.read(read, 1)
125+ runner = JobRunner([job])
126+ runner.runAll()
127+ self.assertEqual(JobStatus.WAITING, job.status)
128+ self.assertEqual([], runner.oops_ids)
129+ self.assertIn(
130+ "Scheduling retry due to AdvisoryLockHeld", logger.output)
131+ finally:
132+ os.kill(pid, signal.SIGINT)
133+
134 def test_run_posts_copy_failure_as_comment(self):
135 # If the job fails with a CannotCopy exception, it swallows the
136 # exception and posts a DistroSeriesDifferenceComment with the