Merge ~mdeslaur/ubuntu-cve-tracker:check-cves-perf1 into ubuntu-cve-tracker:master

Proposed by Marc Deslauriers
Status: Merged
Merged at revision: eb8e65266e3f7a038f589060a9dff7be4cd69e0c
Proposed branch: ~mdeslaur/ubuntu-cve-tracker:check-cves-perf1
Merge into: ubuntu-cve-tracker:master
Diff against target: 82 lines (+17/-15)
1 file modified
scripts/check-cves (+17/-15)
Reviewer Review Type Date Requested Status
Steve Beattie Approve
Review via email: mp+462263@code.launchpad.net

Commit message

commit cb191ce1d077e41c34d4c27766e07bf97a54dc0a
Author: Marc Deslauriers <email address hidden>
Date: Tue Mar 12 15:02:34 2024 -0400

    check-cves: improve performance with sets instead of lists

    CVEIgnoreNotForUsList and CVEIgnoreMistriagedList were gigantic
    lists of CVEs and check-cves keeps searching for cves in them.
    Switching them to sets dramatically improves performance.

commit bcc2a35fad29a1dd31900e70ea4f44f38fd20f31
Author: Marc Deslauriers <email address hidden>
Date: Tue Mar 12 12:55:12 2024 -0400

    check-cves: simplify mistriaged() logic

    We don't need to call source_map.madison here and perform expensive
    searches, we just need to know if we can see this package in the
    source map. Bail out as soon as we do.

Description of the change

This merge proposal dramatically improves check-cves performance:

BEFORE:
$ time ./scripts/check-cves --import-missing-debian

real 3m17.930s
user 3m17.660s
sys 0m0.274s

AFTER:
$ time ./scripts/check-cves --import-missing-debian

real 0m5.845s
user 0m5.571s
sys 0m0.292s

To post a comment you must log in.
Revision history for this message
Steve Beattie (sbeattie) wrote :

Awesome catch, thanks! Merged.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/scripts/check-cves b/scripts/check-cves
index 8a9e8fc..2abf4cc 100755
--- a/scripts/check-cves
+++ b/scripts/check-cves
@@ -223,22 +223,24 @@ def import_debian(handler):
223 today = datetime_date.today()223 today = datetime_date.today()
224 known = set(CVEKnownList + CVEIgnoreList)224 known = set(CVEKnownList + CVEIgnoreList)
225225
226 def ever_existed(pkg):
227 for rel in source:
228 if pkg in source[rel]:
229 return True
230 return False
231
226 def mistriaged(cve):232 def mistriaged(cve):
227 if cve in CVEIgnoreNotForUsList and \233 if cve in CVEIgnoreNotForUsSet and \
228 cve not in CVEIgnoreMistriagedList and \234 cve not in CVEIgnoreMistriagedSet and \
229 handler.debian[cve]['state'] == 'FOUND':235 handler.debian[cve]['state'] == 'FOUND':
230 # check that at least one of the assigned packages exist236 # check that at least one of the assigned packages exist
231 # in Ubuntu237 # in Ubuntu
232 pkgs = {}
233 for pkg in handler.debian[cve]['pkgs'].keys():238 for pkg in handler.debian[cve]['pkgs'].keys():
234 # ignore package if debian says is not affected239 # ignore package if debian says is not affected
235 if handler.debian[cve]['pkgs'][pkg]['state'] == '<not-affected>':240 if handler.debian[cve]['pkgs'][pkg]['state'] == '<not-affected>':
236 continue241 continue
237 answer = source_map.madison(source, pkg)242 if ever_existed(pkg):
238 if len(answer) > 0:243 return True
239 pkgs[pkg] = answer
240 if len(pkgs) > 0:
241 return True
242 return False244 return False
243245
244 # pull in CVEs from data/DSA/list246 # pull in CVEs from data/DSA/list
@@ -1347,7 +1349,7 @@ class CVEHandler(xml.sax.handler.ContentHandler):
1347 def add_cve(self, cve, packages, priority=None):1349 def add_cve(self, cve, packages, priority=None):
1348 # remove from not-for-us.txt if adding and ensure we remove any1350 # remove from not-for-us.txt if adding and ensure we remove any
1349 # mistriaged_hint from the description1351 # mistriaged_hint from the description
1350 if cve in CVEIgnoreNotForUsList:1352 if cve in CVEIgnoreNotForUsSet:
1351 cmd = ['sed', '-i', '/^%s #.*$/d' % cve, './ignored/not-for-us.txt']1353 cmd = ['sed', '-i', '/^%s #.*$/d' % cve, './ignored/not-for-us.txt']
1352 subprocess.call(cmd)1354 subprocess.call(cmd)
1353 self.cve_data[cve]['desc'] = self.cve_data[cve]['desc'].replace(mistriaged_hint, '')1355 self.cve_data[cve]['desc'] = self.cve_data[cve]['desc'].replace(mistriaged_hint, '')
@@ -1469,7 +1471,7 @@ class CVEHandler(xml.sax.handler.ContentHandler):
1469 def ignore_cve(self, cve, reason):1471 def ignore_cve(self, cve, reason):
1470 # Append to ignore list unless is already in CVEIgnoreList and then1472 # Append to ignore list unless is already in CVEIgnoreList and then
1471 # append to the ignored/ignore-mistriaged.txt1473 # append to the ignored/ignore-mistriaged.txt
1472 txtfile = 'ignore-mistriaged.txt' if cve in CVEIgnoreNotForUsList else 'not-for-us.txt'1474 txtfile = 'ignore-mistriaged.txt' if cve in CVEIgnoreNotForUsSet else 'not-for-us.txt'
1473 with open('%s/ignored/%s' % (destdir, txtfile), 'a') as f:1475 with open('%s/ignored/%s' % (destdir, txtfile), 'a') as f:
1474 f.write('%s # %s\n' % (cve, reason))1476 f.write('%s # %s\n' % (cve, reason))
14751477
@@ -1588,21 +1590,21 @@ class CheckCVETest(unittest.TestCase):
1588ignored_notforus_path = 'ignored/not-for-us.txt'1590ignored_notforus_path = 'ignored/not-for-us.txt'
1589if destdir != './' and destdir != '.':1591if destdir != './' and destdir != '.':
1590 ignored_notforus_path = os.path.join(destdir, ignored_notforus_path)1592 ignored_notforus_path = os.path.join(destdir, ignored_notforus_path)
1591# CVEIgnoreNotForUsList is a list of all CVEs that we have previously1593# CVEIgnoreNotForUsSet is a set of all CVEs that we have previously
1592# chosen to ignore since they don't apply to software in Ubuntu1594# chosen to ignore since they don't apply to software in Ubuntu
1593CVEIgnoreNotForUsList = cve_lib.parse_CVEs_from_uri(ignored_notforus_path)1595CVEIgnoreNotForUsSet = set(cve_lib.parse_CVEs_from_uri(ignored_notforus_path))
15941596
1595ignored_mistriaged_path = 'ignored/ignore-mistriaged.txt'1597ignored_mistriaged_path = 'ignored/ignore-mistriaged.txt'
1596if destdir != './' and destdir != '.':1598if destdir != './' and destdir != '.':
1597 ignored_mistriaged_path = os.path.join(destdir, ignored_mistriaged_path)1599 ignored_mistriaged_path = os.path.join(destdir, ignored_mistriaged_path)
1598# CVEIgnoreMistriagedList is a list of all CVEs that we want to definitely1600# CVEIgnoreMistriagedSet is a set of all CVEs that we want to definitely
1599# ignore when doing mistriaged CVE detection - they should exist in both1601# ignore when doing mistriaged CVE detection - they should exist in both
1600# CVEIgnoreNotForUsList and CVEIgnoreMistriagedList1602# CVEIgnoreNotForUsList and CVEIgnoreMistriagedList
1601CVEIgnoreMistriagedList = cve_lib.parse_CVEs_from_uri(ignored_mistriaged_path)1603CVEIgnoreMistriagedSet = set(cve_lib.parse_CVEs_from_uri(ignored_mistriaged_path))
16021604
1603# CVEIgnoreList is a list of all CVEs we know about already. These will be1605# CVEIgnoreList is a list of all CVEs we know about already. These will be
1604# ignored when checking MITRE for new CVEs1606# ignored when checking MITRE for new CVEs
1605CVEIgnoreList = list(CVEIgnoreNotForUsList)1607CVEIgnoreList = list(CVEIgnoreNotForUsSet)
16061608
1607CVEKnownList = []1609CVEKnownList = []
1608CVEKnownList += [cve for cve in os.listdir(destdir + "/ignored/") if cve.startswith('CVE-')]1610CVEKnownList += [cve for cve in os.listdir(destdir + "/ignored/") if cve.startswith('CVE-')]

Subscribers

People subscribed via source and target branches