Merge lp:~brian-murray/apport/contents-mapping into lp:~apport-hackers/apport/trunk

Proposed by Brian Murray
Status: Merged
Merged at revision: 3211
Proposed branch: lp:~brian-murray/apport/contents-mapping
Merge into: lp:~apport-hackers/apport/trunk
Diff against target: 148 lines (+75/-30)
1 file modified
backends/packaging-apt-dpkg.py (+75/-30)
To merge this branch: bzr merge lp:~brian-murray/apport/contents-mapping
Reviewer Review Type Date Requested Status
Dimitri John Ledkov (community) Approve
Apport upstream developers Pending
Review via email: mp+356447@code.launchpad.net

Description of the change

This modifies apport-retrace so that it stops searching each Contents.gz files for every file that appears in ProcMaps until it finds a match. Instead a global mapping of files to packages is created and saved to disk so that subsequent file lookups and retraces are quicker.

Its worth noting that not every file in Contents.gz is added to the mapping because not every file which exists in ProcMaps is utilized as a search criteria for Contents.gz. You can see the filtering in the needed_runtime_packages() function - https://bazaar.launchpad.net/~apport-hackers/apport/trunk/view/head:/apport/sandboxutils.py#L81

I've tested it with sample crashes from every supported release of Ubuntu without issue.

To post a comment you must log in.
3209. By Brian Murray

merge with upstream

Revision history for this message
Dimitri John Ledkov (xnox) wrote :

I hope this works correctly with our "old" and "new" Contents files (with/without headers, ubuntu-only/debian-like) -> the hardcoded checks of 33 lines is ok, as long as we don't re-publish release pocket...

I wonder if this will at all work when we enable usr-merge, as Contents will still list "old" locations, which actually will be installed/loaded from new locations. But this is good, cause we will be able to create additional contents_mappings for alternative paths (as in /bin/bash and /usr/bin/bash become equivalent).

The pickle loading appears to be safe, as the mapping is only user-writable by default (including when using a tempdir).

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'backends/packaging-apt-dpkg.py'
2--- backends/packaging-apt-dpkg.py 2018-10-10 18:10:01 +0000
3+++ backends/packaging-apt-dpkg.py 2018-10-10 23:47:01 +0000
4@@ -50,8 +50,10 @@
5 self._contents_dir = None
6 self._mirror = None
7 self._virtual_mapping_obj = None
8+ self._contents_mapping_obj = None
9 self._launchpad_base = 'https://api.launchpad.net/devel'
10 self._ppa_archive_url = self._launchpad_base + '/~%(user)s/+archive/%(distro)s/%(ppaname)s'
11+ self._contents_update = False
12
13 def __del__(self):
14 try:
15@@ -79,6 +81,25 @@
16 with open(mapping_file, 'wb') as fp:
17 pickle.dump(self._virtual_mapping_obj, fp)
18
19+ def _contents_mapping(self, configdir, arch):
20+ if self._contents_mapping_obj is not None:
21+ return self._contents_mapping_obj
22+
23+ mapping_file = os.path.join(configdir, 'contents_mapping-%s.pickle' % arch)
24+ if os.path.exists(mapping_file):
25+ with open(mapping_file, 'rb') as fp:
26+ self._contents_mapping_obj = pickle.load(fp)
27+ else:
28+ self._contents_mapping_obj = {}
29+
30+ return self._contents_mapping_obj
31+
32+ def _save_contents_mapping(self, configdir, arch):
33+ mapping_file = os.path.join(configdir, 'contents_mapping-%s.pickle' % arch)
34+ if self._contents_mapping_obj is not None:
35+ with open(mapping_file, 'wb') as fp:
36+ pickle.dump(self._contents_mapping_obj, fp)
37+
38 def _cache(self):
39 '''Return apt.Cache() (initialized lazily).'''
40
41@@ -1224,12 +1245,14 @@
42 release = self.get_distro_codename()
43 else:
44 release = self._distro_release_to_codename(release)
45- # search -proposed last since file may be provided by a new package
46- for pocket in ['-updates', '-security', '', '-proposed']:
47- map = os.path.join(dir, '%s%s-Contents-%s.gz' % (release, pocket, arch))
48-
49- # check if map exists and is younger than a day; if not, we need to
50- # refresh it
51+ # this is ordered by likelihood of installation with the most common
52+ # last
53+ for pocket in ['-proposed', '', '-security', '-updates']:
54+ map = os.path.join(dir, '%s%s-Contents-%s.gz' % \
55+ (release, pocket, arch))
56+ # check if map exists and is younger than a day; if not, we need
57+ # to refresh it
58+ update = False
59 try:
60 st = os.stat(map)
61 age = int(time.time() - st.st_mtime)
62@@ -1263,6 +1286,7 @@
63 else:
64 update = True
65 if update:
66+ self._contents_update = True
67 try:
68 src = urlopen(url)
69 except IOError:
70@@ -1282,33 +1306,54 @@
71 src.close()
72 assert os.path.exists(map)
73
74- if file.startswith('/'):
75- file = file[1:]
76-
77- # zgrep is magnitudes faster than a 'gzip.open/split() loop'
78- package = None
79- try:
80- zgrep = subprocess.Popen(['zgrep', '-m1', '^%s[[:space:]]' % file, map],
81- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
82- out = zgrep.communicate()[0].decode('UTF-8')
83- except OSError as e:
84- if e.errno != errno.ENOMEM:
85- raise
86- file_b = file.encode()
87+ contents_mapping = self._contents_mapping(dir, arch)
88+ # if the mapping is empty build it
89+ if not contents_mapping:
90+ self._contents_update = True
91+ # if any of the Contents files were updated we need to update the
92+ # map because the ordering in which is created is important
93+ if self._contents_update:
94 import gzip
95 with gzip.open('%s' % map, 'rb') as contents:
96- out = ''
97+ line_num = 0
98 for line in contents:
99- if line.startswith(file_b):
100- out = line
101- break
102- # we do not check the return code, since zgrep -m1 often errors out
103- # with 'stdout: broken pipe'. The contents file can contain files
104- # with spaces in the name so use the part after the last space.
105- if out:
106- package = out.split()[-1].split(',')[0].split('/')[-1]
107- if package:
108- return package
109+ line_num += 1
110+ # the first 32 lines are descriptive only for these
111+ # releases
112+ if pocket == '' and release in ['trusty', 'xenial'] \
113+ and line_num < 33:
114+ continue
115+ path = line.split()[0]
116+ if path.split(b'/')[0] == b'usr' and \
117+ path.split(b'/')[1] in (b'lib', b'bin', b'sbin'):
118+ package = line.split()[-1].split(b',')[0].split(b'/')[-1]
119+ elif path.split(b'/')[0] in (b'lib', b'bin', b'sbin'):
120+ package = line.split()[-1].split(b',')[0].split(b'/')[-1]
121+ else:
122+ continue
123+ if path in contents_mapping:
124+ if package == contents_mapping[path]:
125+ continue
126+ else:
127+ # if the package was updated use the update
128+ # b/c everyone should have packages from
129+ # -updates and -security installed
130+ contents_mapping[path] = package
131+ else:
132+ contents_mapping[path] = package
133+ # the file only needs to be saved after an update
134+ if self._contents_update:
135+ self._save_contents_mapping(dir, arch)
136+ # the update of the mapping only needs to be done once
137+ self._contents_update = False
138+ if file.startswith('/'):
139+ file = file[1:]
140+ file = file.encode()
141+ try:
142+ pkg = contents_mapping[file].decode()
143+ return pkg
144+ except KeyError:
145+ pass
146 return None
147
148 @classmethod

Subscribers

People subscribed via source and target branches