Merge ~cjwatson/launchpad:artifactory-quoting into launchpad:master

Proposed by Colin Watson
Status: Merged
Approved by: Colin Watson
Approved revision: dfa04bec7fc1a79612b0a09def1540b675045930
Merge reported by: Otto Co-Pilot
Merged at revision: not available
Proposed branch: ~cjwatson/launchpad:artifactory-quoting
Merge into: launchpad:master
Diff against target: 186 lines (+109/-14)
3 files modified
lib/lp/archivepublisher/artifactory.py (+29/-8)
lib/lp/archivepublisher/tests/artifactory_fixture.py (+27/-6)
lib/lp/archivepublisher/tests/test_artifactory.py (+53/-0)
Reviewer Review Type Date Requested Status
Jürgen Gmach Approve
Review via email: mp+425378@code.launchpad.net

Commit message

Fix quoting of semicolons in Artifactory properties

Description of the change

Despite what the Artifactory documentation says, semicolons in properties must be quoted in order to avoid being confused with property separators. The exact observed behaviour of the implementation doesn't seem to be documented anywhere I've found, and it's quite strange in places, but I've done the best I could to make the test fixture agree with reality as closely as possible.

To post a comment you must log in.
Revision history for this message
Jürgen Gmach (jugmac00) wrote (last edit ):

I never heard the term "framing" for this kind of problems before. If time permits, maybe you can rephrase the explanation and just mention that separators must be escaped/quoted.

(Except this is a common term which I am unaware of :-) )

review: Approve
Revision history for this message
Colin Watson (cjwatson) wrote :

I think I got the "framing" term from wgrant. I've rephrased it to be more obvious.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/lib/lp/archivepublisher/artifactory.py b/lib/lp/archivepublisher/artifactory.py
2index e2fa1e8..611100c 100644
3--- a/lib/lp/archivepublisher/artifactory.py
4+++ b/lib/lp/archivepublisher/artifactory.py
5@@ -167,6 +167,14 @@ class ArtifactoryPoolEntry:
6 above example might end up as `"launchpad.channel": ["focal:stable",
7 "jammy:candidate"]`. This can easily be matched by AQL queries and
8 used to generate more specific repositories.
9+
10+ Note that all the property values here must be sorted lists, even if
11+ those are single-item lists. If we use simple strings, then things
12+ mostly work because `artifactory.encode_properties` will still
13+ encode them properly, but they won't match the values returned by
14+ the AQL search via `ArtifactoryPool.getAllArtifacts`, and so
15+ `updateProperties` will always try to update them even if there
16+ aren't really any changes.
17 """
18 properties = {}
19 properties["launchpad.release-id"] = [release_id]
20@@ -207,19 +215,19 @@ class ArtifactoryPoolEntry:
21 # version properties for them. Partially work around
22 # this by setting those properties manually (although
23 # this doesn't cause Sources files to exist).
24- properties["deb.name"] = self.source_name
25- properties["deb.version"] = self.source_version
26+ properties["deb.name"] = [self.source_name]
27+ properties["deb.version"] = [self.source_version]
28 # Debian-format source packages have a license path
29 # required by policy.
30- properties["soss.license"] = "debian/copyright"
31+ properties["soss.license"] = ["debian/copyright"]
32 elif release_id.startswith("binary:"):
33 # Debian-format binary packages have a license path
34 # required by policy (although examining it may require
35 # dereferencing symlinks).
36- properties["soss.license"] = (
37+ properties["soss.license"] = [
38 "/usr/share/doc/%s/copyright"
39 % publications[0].binary_package_name
40- )
41+ ]
42 else:
43 properties["launchpad.channel"] = sorted(
44 {
45@@ -236,10 +244,10 @@ class ArtifactoryPoolEntry:
46 if ci_build is not None:
47 properties.update(
48 {
49- "soss.source_url": (
50+ "soss.source_url": [
51 ci_build.git_repository.getCodebrowseUrl()
52- ),
53- "soss.commit_id": ci_build.commit_sha1,
54+ ],
55+ "soss.commit_id": [ci_build.commit_sha1],
56 }
57 )
58 return properties
59@@ -292,6 +300,19 @@ class ArtifactoryPoolEntry:
60 and key not in self.owned_properties
61 }
62 new_properties.update(properties)
63+ # https://www.jfrog.com/confluence/display/JFROG/Artifactory+REST+API
64+ # claims that ",", backslash, "|", and "=" require quoting with a
65+ # leading URL-encoded backslash. The artifactory module quotes ",",
66+ # "|", and "=" (and handles URL-encoding at a lower layer). In
67+ # practice, backslash must apparently not be quoted, and ";" must be
68+ # quoted as otherwise ";" within items will be confused with a
69+ # property separator. This is not very satisfactory as we're
70+ # essentially playing core wars with the artifactory module, but
71+ # this solves the problem for now.
72+ new_properties = {
73+ key: [v.replace(";", r"\;") for v in value]
74+ for key, value in new_properties.items()
75+ }
76 if old_properties != new_properties:
77 # We could use the ArtifactoryPath.properties setter, but that
78 # will fetch the old properties again when we already have them
79diff --git a/lib/lp/archivepublisher/tests/artifactory_fixture.py b/lib/lp/archivepublisher/tests/artifactory_fixture.py
80index c642031..93bcf41 100644
81--- a/lib/lp/archivepublisher/tests/artifactory_fixture.py
82+++ b/lib/lp/archivepublisher/tests/artifactory_fixture.py
83@@ -112,15 +112,36 @@ class FakeArtifactoryFixture(Fixture):
84 else:
85 return 404, {}, "Unable to find item"
86
87+ def _split(self, delimiter, text):
88+ """Split text on a delimiter the way that Artifactory does.
89+
90+ We need to skip delimiters quoted by a leading backslash.
91+ """
92+ assert delimiter in ",|=;"
93+ return re.findall(r"((?:\\[,|=;]|[^%s])+)" % delimiter, text)
94+
95+ def _unquote(self, text):
96+ """Unquote something the way that Artifactory does.
97+
98+ This is poorly-documented.
99+ https://www.jfrog.com/confluence/display/JFROG/Artifactory+REST+API
100+ says "In order to supply special characters (comma (,), backslash
101+ (\\), pipe (|), equals (=)) as key/value you must add an encoded
102+ backslash (%5C) before them"; but in practice semicolon also needs
103+ to be quoted in this way to avoid being confused with a property
104+ separator, and quoting a backslash like this simply results in a
105+ double backslash.
106+ """
107+ return re.sub(r"\\([,|=;])", r"\1", unquote(text))
108+
109 def _decode_properties(self, encoded):
110 """Decode properties that were encoded as part of a request."""
111 properties = {}
112- for param in encoded.split(";"):
113- key, value = param.split("=", 1)
114- # XXX cjwatson 2022-04-19: Check whether this unquoting approach
115- # actually matches the artifactory module and Artifactory
116- # itself.
117- properties[unquote(key)] = [unquote(v) for v in value.split(",")]
118+ for param in self._split(";", encoded):
119+ key, value = re.match(r"((?:\\[,|=;]|[^=])+)=(.*)", param).groups()
120+ properties[self._unquote(key)] = [
121+ self._unquote(v) for v in self._split(",", value)
122+ ]
123 return properties
124
125 def _handle_upload(self, request):
126diff --git a/lib/lp/archivepublisher/tests/test_artifactory.py b/lib/lp/archivepublisher/tests/test_artifactory.py
127index c89e462..0e013e5 100644
128--- a/lib/lp/archivepublisher/tests/test_artifactory.py
129+++ b/lib/lp/archivepublisher/tests/test_artifactory.py
130@@ -897,3 +897,56 @@ class TestArtifactoryPoolFromLibrarian(TestCaseWithFactory):
131 },
132 path.properties,
133 )
134+
135+ def test_updateProperties_encodes_special_characters(self):
136+ pool = self.makePool(ArchiveRepositoryFormat.PYTHON)
137+ ds = self.factory.makeDistroSeries(
138+ distribution=pool.archive.distribution
139+ )
140+ das = self.factory.makeDistroArchSeries(distroseries=ds)
141+ spr = self.factory.makeSourcePackageRelease(
142+ archive=pool.archive,
143+ sourcepackagename="foo",
144+ version="1.0",
145+ format=SourcePackageType.SDIST,
146+ )
147+ bpph = self.factory.makeBinaryPackagePublishingHistory(
148+ archive=pool.archive,
149+ distroarchseries=das,
150+ pocket=PackagePublishingPocket.RELEASE,
151+ component="main",
152+ source_package_release=spr,
153+ binarypackagename="foo",
154+ binpackageformat=BinaryPackageFormat.WHL,
155+ architecturespecific=False,
156+ channel="edge",
157+ )
158+ bpr = bpph.binarypackagerelease
159+ bpf = self.factory.makeBinaryPackageFile(
160+ binarypackagerelease=bpr,
161+ library_file=self.factory.makeLibraryFileAlias(
162+ filename="foo-1.0-py3-none-any.whl"
163+ ),
164+ filetype=BinaryPackageFileType.WHL,
165+ )
166+ bpphs = [bpph]
167+ transaction.commit()
168+ pool.addFile(
169+ None, bpr.sourcepackagename, bpr.sourcepackageversion, bpf
170+ )
171+ path = pool.rootpath / "foo" / "1.0" / "foo-1.0-py3-none-any.whl"
172+ self.assertTrue(path.exists())
173+ self.assertFalse(path.is_symlink())
174+ # Simulate Artifactory scanning the package.
175+ self.artifactory._fs["/foo/1.0/foo-1.0-py3-none-any.whl"][
176+ "properties"
177+ ]["pypi.summary"] = ["text with special characters: ;=|,\\"]
178+
179+ pool.updateProperties(
180+ bpr.sourcepackagename, bpr.sourcepackageversion, [bpf], bpphs
181+ )
182+
183+ self.assertEqual(
184+ ["text with special characters: ;=|,\\"],
185+ path.properties["pypi.summary"],
186+ )

Subscribers

People subscribed via source and target branches

to status/vote changes: