Merge ~twom/launchpad:oci-file-upload-deduplication into launchpad:master

Proposed by Tom Wardill
Status: Merged
Approved by: Tom Wardill
Approved revision: 296522e8a4ccb0dc96c6fa206f56f9c4663521df
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~twom/launchpad:oci-file-upload-deduplication
Merge into: launchpad:master
Diff against target: 277 lines (+144/-4)
8 files modified
lib/lp/archiveuploader/ocirecipeupload.py (+12/-0)
lib/lp/archiveuploader/tests/test_ocirecipeupload.py (+28/-0)
lib/lp/oci/configure.zcml (+12/-0)
lib/lp/oci/interfaces/ocirecipebuild.py (+8/-0)
lib/lp/oci/model/ocirecipebuild.py (+11/-0)
lib/lp/oci/model/ocirecipebuildbehaviour.py (+11/-4)
lib/lp/oci/tests/test_ocirecipebuild.py (+27/-0)
lib/lp/oci/tests/test_ocirecipebuildbehaviour.py (+35/-0)
Reviewer Review Type Date Requested Status
Colin Watson Approve
Ioana Lasc Approve
Thiago F. Pappacena (community) Approve
Review via email: mp+383859@code.launchpad.net

Commit message

Reuse existing image layers in new uploads

Description of the change

Rather than uploading each layer for each image to the librarian, reuse any that are existing.

This could introduce a race condition with garbage collection, but as we don't currently have any, it's safe for now, but should be considered in the design for garbo.

To post a comment you must log in.
Revision history for this message
Thiago F. Pappacena (pappacena) wrote :

Looks good!

review: Approve
Revision history for this message
Ioana Lasc (ilasc) wrote :

All makes sense to me and looks good code-wise.

review: Approve
Revision history for this message
Colin Watson (cjwatson) :
review: Approve
Revision history for this message
Tom Wardill (twom) :
59920d5... by Tom Wardill

Drop security.py declaration in favour of allow

Revision history for this message
Colin Watson (cjwatson) :
296522e... by Tom Wardill

Fix comment

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/archiveuploader/ocirecipeupload.py b/lib/lp/archiveuploader/ocirecipeupload.py
2index c3db6cb..3ace0da 100644
3--- a/lib/lp/archiveuploader/ocirecipeupload.py
4+++ b/lib/lp/archiveuploader/ocirecipeupload.py
5@@ -17,6 +17,7 @@ from zope.component import getUtility
6
7 from lp.archiveuploader.utils import UploadError
8 from lp.buildmaster.enums import BuildStatus
9+from lp.oci.interfaces.ocirecipebuild import IOCIFileSet
10 from lp.services.helpers import filenameToContentType
11 from lp.services.librarian.interfaces import ILibraryFileAliasSet
12
13@@ -62,6 +63,17 @@ class OCIRecipeUpload:
14 "{}.tar.gz".format(layer_id)
15 )
16 self.logger.debug("Layer path: {}".format(layer_path))
17+ # If the file is already in the librarian,
18+ # we can just reuse it.
19+ existing_file = getUtility(IOCIFileSet).getByLayerDigest(
20+ digest)
21+ # XXX 2020-05-14 twom This will need to respect restricted
22+ # when we do private builds.
23+ if existing_file:
24+ build.addFile(
25+ existing_file.library_file,
26+ layer_file_digest=digest)
27+ continue
28 if not os.path.exists(layer_path):
29 raise UploadError(
30 "Missing layer file: {}.".format(layer_id))
31diff --git a/lib/lp/archiveuploader/tests/test_ocirecipeupload.py b/lib/lp/archiveuploader/tests/test_ocirecipeupload.py
32index bae4f4a..75e2455 100644
33--- a/lib/lp/archiveuploader/tests/test_ocirecipeupload.py
34+++ b/lib/lp/archiveuploader/tests/test_ocirecipeupload.py
35@@ -113,3 +113,31 @@ class TestOCIRecipeUploads(TestUploadProcessorBase):
36 "ERROR Missing layer file: layer_2.",
37 self.log.getLogBuffer())
38 self.assertFalse(self.build.verifySuccessfulUpload())
39+
40+ def test_reuse_existing_file(self):
41+ # The digests.json specifies a file that already exists in the
42+ # librarian, but not on disk
43+ self.assertFalse(self.build.verifySuccessfulUpload())
44+ del get_property_cache(self.build).manifest
45+ del get_property_cache(self.build).digests
46+ upload_dir = os.path.join(
47+ self.incoming_folder, "test", str(self.build.id), "ubuntu")
48+ write_file(os.path.join(upload_dir, "layer_1.tar.gz"), b"layer_1")
49+ write_file(os.path.join(upload_dir, "manifest.json"), b"manifest")
50+ write_file(
51+ os.path.join(upload_dir, "digests.json"), json.dumps(self.digests))
52+
53+ # create the existing file
54+ self.switchToAdmin()
55+ layer_2 = self.factory.makeOCIFile(layer_file_digest="digest_2")
56+ Store.of(layer_2.build).flush()
57+ self.switchToUploader()
58+
59+ handler = UploadHandler.forProcessor(
60+ self.uploadprocessor, self.incoming_folder, "test", self.build)
61+ result = handler.processOCIRecipe(self.log)
62+ self.assertEqual(
63+ UploadStatusEnum.ACCEPTED, result,
64+ "OCI upload failed\nGot: %s" % self.log.getLogBuffer())
65+ self.assertEqual(BuildStatus.FULLYBUILT, self.build.status)
66+ self.assertTrue(self.build.verifySuccessfulUpload())
67diff --git a/lib/lp/oci/configure.zcml b/lib/lp/oci/configure.zcml
68index 616fd2a..1f7a6c7 100644
69--- a/lib/lp/oci/configure.zcml
70+++ b/lib/lp/oci/configure.zcml
71@@ -97,6 +97,18 @@
72 factory="lp.oci.model.ocirecipebuildbehaviour.OCIRecipeBuildBehaviour"
73 permission="zope.Public" />
74
75+ <!-- OCIFile -->
76+ <class class="lp.oci.model.ocirecipebuild.OCIFile">
77+ <allow interface="lp.oci.interfaces.ocirecipebuild.IOCIFile" />
78+ </class>
79+
80+ <!-- OCIFileSet -->
81+ <securedutility
82+ class="lp.oci.model.ocirecipebuild.OCIFileSet"
83+ provides="lp.oci.interfaces.ocirecipebuild.IOCIFileSet">
84+ <allow interface="lp.oci.interfaces.ocirecipebuild.IOCIFileSet" />
85+ </securedutility>
86+
87 <!-- OCIRegistryCredentials -->
88 <class class="lp.oci.model.ociregistrycredentials.OCIRegistryCredentials">
89 <require
90diff --git a/lib/lp/oci/interfaces/ocirecipebuild.py b/lib/lp/oci/interfaces/ocirecipebuild.py
91index 1f00269..95539c0 100644
92--- a/lib/lp/oci/interfaces/ocirecipebuild.py
93+++ b/lib/lp/oci/interfaces/ocirecipebuild.py
94@@ -8,6 +8,7 @@ from __future__ import absolute_import, print_function, unicode_literals
95 __metaclass__ = type
96 __all__ = [
97 'IOCIFile',
98+ 'IOCIFileSet',
99 'IOCIRecipeBuild',
100 'IOCIRecipeBuildSet',
101 'OCIRecipeBuildRegistryUploadStatus',
102@@ -82,6 +83,13 @@ class OCIRecipeBuildRegistryUploadStatus(EnumeratedType):
103 """)
104
105
106+class IOCIFileSet(Interface):
107+ """A file artifact of an OCIRecipeBuild."""
108+
109+ def getByLayerDigest(layer_file_digest):
110+ """Return an `IOCIFile` with the matching layer_file_digest."""
111+
112+
113 class IOCIRecipeBuildView(IPackageBuild):
114 """`IOCIRecipeBuild` attributes that require launchpad.View permission."""
115
116diff --git a/lib/lp/oci/model/ocirecipebuild.py b/lib/lp/oci/model/ocirecipebuild.py
117index 6b8e023..94b7326 100644
118--- a/lib/lp/oci/model/ocirecipebuild.py
119+++ b/lib/lp/oci/model/ocirecipebuild.py
120@@ -43,6 +43,7 @@ from lp.buildmaster.model.packagebuild import PackageBuildMixin
121 from lp.oci.interfaces.ocirecipe import IOCIRecipeSet
122 from lp.oci.interfaces.ocirecipebuild import (
123 IOCIFile,
124+ IOCIFileSet,
125 IOCIRecipeBuild,
126 IOCIRecipeBuildSet,
127 OCIRecipeBuildRegistryUploadStatus,
128@@ -98,6 +99,16 @@ class OCIFile(Storm):
129 self.layer_file_digest = layer_file_digest
130
131
132+@implementer(IOCIFileSet)
133+class OCIFileSet:
134+
135+ def getByLayerDigest(self, layer_file_digest):
136+ return IStore(OCIFile).find(
137+ OCIFile,
138+ OCIFile.layer_file_digest == layer_file_digest).order_by(
139+ OCIFile.id).first()
140+
141+
142 @implementer(IOCIRecipeBuild)
143 class OCIRecipeBuild(PackageBuildMixin, Storm):
144
145diff --git a/lib/lp/oci/model/ocirecipebuildbehaviour.py b/lib/lp/oci/model/ocirecipebuildbehaviour.py
146index dbef696..6205cfd 100644
147--- a/lib/lp/oci/model/ocirecipebuildbehaviour.py
148+++ b/lib/lp/oci/model/ocirecipebuildbehaviour.py
149@@ -18,6 +18,7 @@ import json
150 import os
151
152 from twisted.internet import defer
153+from zope.component import getUtility
154 from zope.interface import implementer
155
156 from lp.app.errors import NotFoundError
157@@ -32,6 +33,7 @@ from lp.buildmaster.interfaces.buildfarmjobbehaviour import (
158 from lp.buildmaster.model.buildfarmjobbehaviour import (
159 BuildFarmJobBehaviourBase,
160 )
161+from lp.oci.interfaces.ocirecipebuild import IOCIFileSet
162 from lp.registry.interfaces.series import SeriesStatus
163 from lp.services.librarian.utils import copy_and_close
164 from lp.snappy.model.snapbuildbehaviour import SnapProxyMixin
165@@ -134,12 +136,17 @@ class OCIRecipeBuildBehaviour(SnapProxyMixin, BuildFarmJobBehaviourBase):
166 # This is in the form '<id>/layer.tar', we only need the first
167 layer_filename = "{}.tar.gz".format(layer_id.split('/')[0])
168 digest = digests_section[diff_id]['digest']
169- try:
170- _, librarian_file, _ = self.build.getLayerFileByDigest(
171- digest)
172- except NotFoundError:
173+ # Check if the file already exists in the librarian
174+ oci_file = getUtility(IOCIFileSet).getByLayerDigest(
175+ digest)
176+ if oci_file:
177+ librarian_file = oci_file.library_file
178+ # If it doesn't, we need to download it
179+ else:
180 files.add(layer_filename)
181 continue
182+ # If the file already exists, retrieve it from the librarian
183+ # so we can add it to the build artifacts
184 layer_path = os.path.join(upload_path, layer_filename)
185 librarian_file.open()
186 copy_and_close(librarian_file, open(layer_path, 'wb'))
187diff --git a/lib/lp/oci/tests/test_ocirecipebuild.py b/lib/lp/oci/tests/test_ocirecipebuild.py
188index 84a69f4..2ebffde 100644
189--- a/lib/lp/oci/tests/test_ocirecipebuild.py
190+++ b/lib/lp/oci/tests/test_ocirecipebuild.py
191@@ -29,6 +29,7 @@ from lp.oci.interfaces.ocirecipe import (
192 OCI_RECIPE_WEBHOOKS_FEATURE_FLAG,
193 )
194 from lp.oci.interfaces.ocirecipebuild import (
195+ IOCIFileSet,
196 IOCIRecipeBuild,
197 IOCIRecipeBuildSet,
198 )
199@@ -52,6 +53,32 @@ from lp.testing.layers import (
200 from lp.testing.matchers import HasQueryCount
201
202
203+class TestOCIFileSet(TestCaseWithFactory):
204+
205+ layer = LaunchpadZopelessLayer
206+
207+ def setUp(self):
208+ super(TestOCIFileSet, self).setUp()
209+ self.useFixture(FeatureFixture({OCI_RECIPE_ALLOW_CREATE: 'on'}))
210+
211+ def test_implements_interface(self):
212+ file_set = getUtility(IOCIFileSet)
213+ self.assertProvides(file_set, IOCIFileSet)
214+
215+ def test_getByLayerDigest(self):
216+ digest = "test_digest"
217+ oci_file = self.factory.makeOCIFile(layer_file_digest=digest)
218+ for _ in range(3):
219+ self.factory.makeOCIFile()
220+
221+ result = getUtility(IOCIFileSet).getByLayerDigest(digest)
222+ self.assertEqual(oci_file, result)
223+
224+ def test_getByLayerDigest_not_matching(self):
225+ result = getUtility(IOCIFileSet).getByLayerDigest("not existing")
226+ self.assertIsNone(result)
227+
228+
229 class TestOCIRecipeBuild(TestCaseWithFactory):
230
231 layer = LaunchpadZopelessLayer
232diff --git a/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py b/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py
233index 8536072..ee18740 100644
234--- a/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py
235+++ b/lib/lp/oci/tests/test_ocirecipebuildbehaviour.py
236@@ -519,6 +519,41 @@ class TestHandleStatusForOCIRecipeBuild(MakeOCIBuildMixin,
237 self.assertEqual(contents, b'retrieved from librarian')
238
239 @defer.inlineCallbacks
240+ def test_handleStatus_OK_reuse_from_other_build(self):
241+ """We should be able to reuse a layer file from a separate build."""
242+ self.factory.makeOCIFile(
243+ layer_file_digest=u'digest_2',
244+ content="layer 2 retrieved from librarian")
245+
246+ now = datetime.now()
247+ mock_datetime = self.useFixture(MockPatch(
248+ 'lp.buildmaster.model.buildfarmjobbehaviour.datetime')).mock
249+ mock_datetime.now = lambda: now
250+ with dbuser(config.builddmaster.dbuser):
251+ yield self.behaviour.handleStatus(
252+ self.build.buildqueue_record, 'OK',
253+ {'filemap': self.filemap})
254+ self.assertEqual(
255+ ['buildlog', 'manifest_hash', 'digests_hash', 'config_1_hash'],
256+ self.slave._got_file_record)
257+ # This hash should not appear as it is already in the librarian
258+ self.assertNotIn('layer_1_hash', self.slave._got_file_record)
259+ self.assertNotIn('layer_2_hash', self.slave._got_file_record)
260+
261+ # layer_2 should have been retrieved from the librarian
262+ layer_2_path = os.path.join(
263+ self.upload_root,
264+ "incoming",
265+ self.behaviour.getUploadDirLeaf(self.build.build_cookie),
266+ str(self.build.archive.id),
267+ self.build.distribution.name,
268+ "layer_2.tar.gz"
269+ )
270+ with open(layer_2_path, 'rb') as layer_2_fp:
271+ contents = layer_2_fp.read()
272+ self.assertEqual(contents, b'layer 2 retrieved from librarian')
273+
274+ @defer.inlineCallbacks
275 def test_handleStatus_OK_absolute_filepath(self):
276
277 self._createTestFile(

Subscribers

People subscribed via source and target branches

to status/vote changes: