Comments and a question inline; modulo the answer to the question, I'm okay with this patch set. Thanks. On Tue, Sep 21, 2021 at 05:09:27AM -0000, Alex Murray wrote: > index 384fb7e..b3224c0 100755 > --- a/scripts/check-syntax > +++ b/scripts/check-syntax > @@ -812,23 +812,36 @@ for cve in args: > ) > cve_okay = False > > - # Check to make sure all patch references match the type:reference > - # pattern > - for key in data.keys(): > - if "Patches_" in key and len(data[key]) > 0: > - for line in re.split("\n", data[key]): > - patch_type = re.split(":", line)[0] > - if re.search("http", patch_type): > - print( > - "%s: %d: patch reference %s doesn't contain a type modifier (e.g. upstream:)" > - % (cvepath, srcmap[key] if key in srcmap else 1, key), > - file=sys.stderr, > - ) > - cve_okay = False > - if re.search("patch", patch_type): > + for pkg in data["patches"]: > + for index, value in enumerate(data["patches"][pkg]): > + patch_type, patch = data["patches"][pkg][index] > + # validate break-fix entries as 'I?hash|-|local-|URL' and > + # others should be a URL - but don't bother with retired > + # CVEs as these have a lot of old cruft > + if patch_type == "break-fix": > + try: > + bfre = "^(-|I?[a-f0-9]{1,40}|local-[A-Za-z0-9-]+|https?://.*)$" > + breaks, fixes = patch.split(' ', 1) > + # breaks and fixes can contain multiple entries separated by | > + for brk in breaks.split('|'): > + if re.match(bfre, brk) is None: > + raise ValueError("invalid break entry '%s':" % brk) > + # fixes can contain multiple entries separated by | > + for fix in fixes.split('|'): > + if re.match(bfre, fix) is None: > + raise ValueError("invalid fix entry '%s':" % fix) > + except Exception as e: > + print( > + "%s: %d: invalid break-fix entry: '%s': %s" > + % (cvepath, srcmap["patches"][pkg][index], patch, e), > + file=sys.stderr, > + ) > + cve_okay = False > + elif not "retired/" in cvepath: It would possibly be okay to check retired CVEs when --strict is passed. > + if "://" not in patch: > print( > - "%s: %d: invalid type modifier in %s, please use upstream:, vendor:, debdiff:, other:, etc." > - % (cvepath, srcmap[key] if key in srcmap else 1, key), > + "%s: %d: invalid patch URL '%s'" > + % (cvepath, srcmap["patches"][pkg][index], patch), > file=sys.stderr, > ) > cve_okay = False > diff --git a/scripts/cve_lib.py b/scripts/cve_lib.py > index c706a61..88dd326 100755 > --- a/scripts/cve_lib.py > +++ b/scripts/cve_lib.py > @@ -1195,6 +1195,8 @@ def load_cve(cve, strict=False, subprojects=list(), srcmap=None): > srcmap.setdefault('pkgs', dict()) > srcmap.setdefault('tags', dict()) > data.setdefault('tags', dict()) > + srcmap.setdefault('patches', dict()) > + data.setdefault('patches', dict()) > affected = dict() > non_ubuntu_affected = dict() > lastfield = None > @@ -1220,6 +1222,17 @@ def load_cve(cve, strict=False, subprojects=list(), srcmap=None): > code, newmsg = notes_parser.parse_line(cve, line, linenum, code) > if code != EXIT_OKAY: > msg += newmsg > + elif 'Patches_' in lastfield: > + try: > + _, pkg = lastfield.split('_', 1) > + patch_type, entry = line.split(':', 1) > + patch_type = patch_type.strip() > + entry = entry.strip() > + data['patches'][pkg].append((patch_type, entry)) > + srcmap['patches'][pkg].append(linenum) > + except Exception as e: > + msg += "%s: %d: Failed to parse '%s' entry %s: %s\n" % (cve, linenum, lastfield, line, e) > + code = EXIT_FAIL > elif lastfield == 'CVSS': > source, vector = line.split(':', 1) > try: > @@ -1280,15 +1293,19 @@ def load_cve(cve, strict=False, subprojects=list(), srcmap=None): > msg += "%s: %d: unknown Priority '%s'\n" % (cve, linenum, value) > code = EXIT_FAIL > elif 'Patches_' in field: > - '''These are raw fields''' > try: > foo, pkg = field.split('_', 1) > except ValueError: > msg += "%s: %d: bad field with 'Patches_': '%s'\n" % (cve, linenum, field) > code = EXIT_FAIL > continue > - data.setdefault(field, value) > - srcmap.setdefault(field, linenum) > + # value should be empty > + if len(value) > 0: > + msg += "%s: %d: '%s' field should have no value\n" % (cve, linenum, field) > + code = EXIT_FAIL > + continue > + data['patches'].setdefault(pkg, list()) > + srcmap['patches'].setdefault(pkg, list()) So one consequence here is that the patches gets populated with empty lists for every package in the CVE, whether it has a reference to a patch or not, unlike how tags are handled. For example, using Albert's example CVE in the motivating bug report, we get: In [1]: import cve_lib In [2]: cve_lib.load_cve('active/CVE-2012-4542') Out[2]: {'tags': {'linux-armadaxp': {'not-ue'}, 'linux-lts-quantal': {'not-ue'}, 'linux-lts-saucy': {'not-ue'}}, 'patches': {'linux': [('vendor', 'https://oss.oracle.com/git/gitweb.cgi?p=redpatch.git;a=commitdiff;h=76a274e17114abf1a77de6b651424648ce9e10c8'), ('vendor', 'http://rhn.redhat.com/errata/RHSA-2013-0496.html'), ('vendor', 'http://rhn.redhat.com/errata/RHSA-2013-0579.html'), ('vendor', 'http://rhn.redhat.com/errata/RHSA-2013-0882.html'), ('vendor', 'http://rhn.redhat.com/errata/RHSA-2013-0928.html')], 'linux-ec2': [], 'linux-mvl-dove': [], 'linux-ti-omap4': [], 'linux-lts-backport-maverick': [], 'linux-fsl-imx51': [], 'linux-lts-backport-oneiric': [], 'linux-linaro-omap': [], 'linux-linaro-shared': [], 'linux-linaro-vexpress': [], 'linux-qcm-msm': [], 'linux-armadaxp': [], [ rest elided ] Is that what we want? It makes programmatic manipulation later on easier (because the entry list will always exist to append to), but it also means your exporters have to check the length for each to see if there's anything to export. > elif 'Tags_' in field: > '''These are processed into the "tags" hash''' > try: > diff --git a/scripts/html_export.py b/scripts/html_export.py > index 066ffea..0253eaa 100755 > --- a/scripts/html_export.py > +++ b/scripts/html_export.py > @@ -272,16 +272,10 @@ def htmlize_cve(cvefile, outfd, commit=None): > print('', file=outfd) > print('', file=outfd) > > - patches = 'Patches_%s' % (pkg) > - if patches in data: > - entries = data[patches] > - if entries != "": > - print('
%s: | %s %s |
%s: | %s |