Merge ~alexmurray/ubuntu-cve-tracker:nvd-json into ubuntu-cve-tracker:master

Proposed by Alex Murray
Status: Merged
Merged at revision: 8dea86e836bbdb0e1b1e81d29c41c5ffcc6f0ab1
Proposed branch: ~alexmurray/ubuntu-cve-tracker:nvd-json
Merge into: ubuntu-cve-tracker:master
Diff against target: 433 lines (+111/-130)
3 files modified
.gitignore (+1/-0)
scripts/check-cves (+96/-116)
scripts/process_cves (+14/-14)
Reviewer Review Type Date Requested Status
Alex Murray Approve
Review via email: mp+372964@code.launchpad.net
To post a comment you must log in.
Revision history for this message
Alex Murray (alexmurray) wrote :

FYI - I plan to merge this on Wednesday if there is no feedback by then.

Revision history for this message
Alex Murray (alexmurray) wrote :

Approving as no feedback in nearly 2 weeks.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/.gitignore b/.gitignore
2index 766252e..ebc4960 100644
3--- a/.gitignore
4+++ b/.gitignore
5@@ -2,6 +2,7 @@
6 .*.swp
7 allitems.xml*
8 nvdcve*.xml*
9+nvdcve*.json*
10 database.pickle*
11 database-all.pickle*
12 embargoed
13diff --git a/scripts/check-cves b/scripts/check-cves
14index 44e814b..bb77a33 100755
15--- a/scripts/check-cves
16+++ b/scripts/check-cves
17@@ -15,6 +15,7 @@
18 from __future__ import print_function
19
20 import datetime
21+import json
22 import optparse
23 import os
24 import os.path
25@@ -253,53 +254,40 @@ class PercentageFile(object):
26 return self.f.close()
27
28
29-def get_fake_nvd_xml_header():
30- '''Generate NVD header xml'''
31- today = "%s-%s-%s" % (datetime.date.today().year,
32- datetime.date.today().month,
33- datetime.date.today().day)
34- return '''<?xml version='1.0' encoding='UTF-8'?>
35-<nvd xmlns="http://nvd.nist.gov/feeds/cve/1.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" pub_date="%s" xsi:schemaLocation="http://nvd.nist.gov/feeds/cve/1.2 http://nvd.nist.gov/schema/nvdcve.xsd" nvd_xml_version="1.2">
36-''' % (today)
37-
38-
39-def get_fake_nvd_xml_footer():
40- '''Generate NVD footer xml'''
41- return '''</nvd>\n'''
42-
43-
44-def convert_to_xml(cves, cve, desc):
45- s = ''' <entry type="CVE" severity="Medium" seq="%s" published="%s" name="%s" modified="%s" CVSS_version="2.0" CVSS_vector="(AV:N/AC:M/Au:N/C:N/I:P/A:P)" CVSS_score="0.0" CVSS_impact_subscore="0.0" CVSS_exploit_subscore="0.0" CVSS_base_score="0.0">
46- <desc>
47- <descript source="cve">%s</descript>
48- </desc>
49- <loss_types />
50- <range />
51-''' % (re.sub(r'CVE\-', '', cve),
52- cves[cve]['date'].strftime("%Y-%m-%d"),
53- cve,
54- cves[cve]['date'].strftime("%Y-%m-%d"),
55- desc)
56-
57- if cve in cves and 'refs' in cves[cve] and len(cves[cve]['refs']) > 0:
58- s += ' <refs>\n'
59- for r in cves[cve]['refs']:
60- s += '<ref url="%s" source="MISC">%s</ref>\n' % (r, r)
61- s += ' </refs>\n'
62- else:
63- s += ' <refs />\n'
64+def convert_to_nvd(cves=[], desc=""):
65+ # convert to nvd format dict (like nvd json)
66+ nvd = {"CVE_data_type": "CVE",
67+ "CVE_data_format": "MITRE",
68+ "CVE_data_version": "4.0",
69+ "CVE_data_timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"),
70+ "CVE_Items": []}
71
72- s += ''' <vuln_soft />
73- </entry>
74-'''
75- return s
76+ keys = cves.keys()
77+ keys.sort()
78+ for cve in keys:
79+ refs = []
80+ if cve in cves and 'refs' in cves[cve] and len(cves[cve]['refs']) > 0:
81+ for r in cves[cve]['refs']:
82+ refs.append({"name": r,
83+ "url": r,
84+ "refsource": "MISC"})
85+ nvd["CVE_Items"].append({"cve":
86+ {"data_type": "CVE",
87+ "data_format": "MITRE",
88+ "data_version": "4.0",
89+ "CVE_data_meta": {"ID": cve},
90+ "references": {"reference_data": refs},
91+ "description": {"description_data": [
92+ {"lang": "en",
93+ "value": desc(cve) if desc else ""}
94+ ]}},
95+ "publishedDate": cves[cve]['date'].strftime("%Y-%m-%dT%H:%M:%SZ")})
96+ return nvd
97
98
99 def import_debian(handler):
100 '''Import Debian CVEs and DSAs missing from the tracker'''
101 cves = dict()
102- cve = None
103- xml = get_fake_nvd_xml_header()
104
105 today = datetime.date.today()
106 known = set(CVEKnownList + CVEIgnoreList)
107@@ -370,15 +358,9 @@ def import_debian(handler):
108 if opt.verbose:
109 print("Processing %s: %s (%s)" % (cve, handler.debian[cve]['desc'], cves[cve]['date']), file=sys.stderr)
110
111- keys = cves.keys()
112- keys.sort()
113- for c in keys:
114- xml += convert_to_xml(cves, c, cves[c]['subject'])
115-
116- xml += get_fake_nvd_xml_footer()
117-
118- tmp, tmpname = tempfile.mkstemp(prefix='debian-import_')
119- os.write(tmp, xml)
120+ nvd = convert_to_nvd(cves, lambda cve: cves[cve]['subject'])
121+ tmp, tmpname = tempfile.mkstemp(prefix='debian-import_', suffix='.json')
122+ os.write(tmp, json.dumps(nvd))
123 os.close(tmp)
124
125 return tmpname
126@@ -386,14 +368,14 @@ def import_debian(handler):
127
128 def read_locate_cves_output(f):
129 '''Read in output of UCT/scripts/locate_cves.py
130- This is sneaky because we read in the output and then output a fake XML
131+ This is sneaky because we read in the output and then output a fake JSON
132 file for processing.
133 '''
134 if not os.path.isfile(f):
135 print("'%s' not a file" % f, file=sys.stderr)
136 sys.exit(1)
137
138- name = os.path.abspath(f + ".xml")
139+ name = os.path.abspath(f + ".json")
140 if os.path.exists(name):
141 print("'%s' already exists" % name, file=sys.stderr)
142 sys.exit(1)
143@@ -459,18 +441,11 @@ def read_locate_cves_output(f):
144 url = "http://www.openwall.com/lists/oss-security/%s" % (cves[cve]['date'].strftime("%Y/%m/%d"))
145 cves[cve].setdefault('refs', [] + [url])
146
147- xml = get_fake_nvd_xml_header()
148-
149- keys = cves.keys()
150- keys.sort()
151- for c in keys:
152- desc = '''ML-Date: %s, ML-Subject: %s''' % (cves[c]['date'], escape(cves[c]['subject']))
153- xml += convert_to_xml(cves, c, desc)
154-
155- xml += get_fake_nvd_xml_footer()
156+ nvd = convert_to_nvd(cves, lambda c: '''ML-Date: %s, ML-Subject: %s''' %
157+ (cves[c]['date'], escape(cves[c]['subject'])))
158
159 tmp, tmpname = tempfile.mkstemp()
160- os.write(tmp, xml)
161+ os.write(tmp, json.dumps(nvd))
162
163 os.close(tmp)
164 shutil.move(tmpname, name)
165@@ -492,9 +467,9 @@ def read_mbox_file(f):
166 os.write(tmp, out)
167 os.close(tmp)
168
169- xml_file = read_locate_cves_output(tmpname)
170+ json_file = read_locate_cves_output(tmpname)
171 os.unlink(tmpname)
172- return xml_file
173+ return json_file
174
175
176 def dpkg_compare_versions(v1, op, v2):
177@@ -551,7 +526,7 @@ class CVEHandler(xml.sax.handler.ContentHandler):
178 self.debian = None
179
180 # File-type detection
181- self.use_nvd = None
182+
183
184 # Load debian CVE states, if configured
185 if 'secure_testing_path' in cve_lib.config:
186@@ -573,43 +548,53 @@ class CVEHandler(xml.sax.handler.ContentHandler):
187 print("---------------------------")
188 print("%5d total CVEs triaged" % (self.num_added + self.num_ignored))
189
190- def startElement(self, name, attrs):
191- # NVD items
192- if name == "entry":
193- # Detect broken XML
194- if self.use_nvd is not None and not self.use_nvd:
195- raise KeyError("saw 'entry' in Mitre XML")
196- if self.use_nvd is None and opt.verbose:
197- print("Detected NVD XML schema", file=sys.stderr)
198- self.use_nvd = True
199- self.curr_cve = attrs['name']
200- self.curr_public = attrs['published']
201+ def parse_json(self, fp):
202+ template_nvd = {"CVE_data_type": "CVE",
203+ "CVE_data_format": "MITRE",
204+ "CVE_data_version": "4.0"}
205+ nvd = json.load(fp)
206+ # check for expected fields
207+ for key in template_nvd.keys():
208+ if key not in nvd or nvd[key] != template_nvd[key]:
209+ raise KeyError("NVD JSON in '%s' seems invalid" % fp.name)
210+ for item in nvd["CVE_Items"]:
211+ template_cve = {"data_type": "CVE",
212+ "data_format": "MITRE",
213+ "data_version": "4.0"}
214+ if "publishedDate" in item:
215+ # convert from YYYY-MM-DDTHH:MM:SSZ to YYYY-MM-DD HH:MM:SS UTC
216+ self.curr_public = item["publishedDate"].replace("T", " ").replace("Z", ":00 UTC")
217+ else:
218+ self.curr_public = None
219+ cve = item["cve"]
220+ for key in template_cve.keys():
221+ if key not in cve or cve[key] != template_cve[key]:
222+ raise KeyError("NVD JSON in '%s' seems invalid" % fp.name)
223+
224+ metadata = cve["CVE_data_meta"]
225+ self.curr_cve = metadata["ID"]
226 self.curr_refs = []
227- self.curr_url = None
228 self.curr_desc = None
229- self.curr_desc_ready = False
230- if name == "descript":
231- self.curr_chars_collect = True
232- self.curr_chars = ""
233- if attrs['source'] == 'cve':
234- self.curr_desc_ready = True
235- # Mitre items
236+ for ref in cve["references"]["reference_data"]:
237+ self.curr_refs += [(ref["refsource"], ref["name"], ref["url"])]
238+ # find an english description
239+ for desc in cve["description"]["description_data"]:
240+ if desc["lang"] == "en":
241+ self.curr_desc = desc["value"]
242+ self.handle_cve()
243+
244+
245+ def startElement(self, name, attrs):
246 if name == "item":
247- # Detect broken XML
248- if self.use_nvd is not None and self.use_nvd:
249- raise KeyError("saw 'item' in NVD XML")
250- if self.use_nvd is None and opt.verbose:
251- print("Detected Mitre XML schema", file=sys.stderr)
252- self.use_nvd = False
253+ if opt.verbose:
254+ print("Parsing Mitre XML schema", file=sys.stderr)
255 self.curr_cve = attrs['name']
256 self.curr_refs = []
257 self.curr_url = None
258 self.curr_desc = None
259- if name == "desc" and self.use_nvd is False:
260+ if name == "desc":
261 self.curr_chars_collect = True
262 self.curr_chars = ""
263-
264- # Common
265 if name == "ref":
266 self.curr_chars_collect = True
267 self.curr_chars = ""
268@@ -626,23 +611,15 @@ class CVEHandler(xml.sax.handler.ContentHandler):
269
270 def endElement(self, name):
271 self.curr_chars_collect = False
272- # NVD items
273- if name == "descript" and self.curr_desc_ready:
274- self.curr_desc = self.curr_chars.encode("ascii", "replace")
275- self.curr_desc_ready = False
276- if name == "entry":
277- self.handle_cve()
278 # Mitre items
279- if name == "desc" and not self.use_nvd:
280+ if name == "desc":
281 self.curr_desc = self.curr_chars.encode("ascii", "replace")
282 if name == "item":
283 self.handle_cve()
284- # Common
285 if name == "ref":
286 self.curr_refs += [(self.curr_source, self.curr_chars.encode("ascii", "replace"), self.curr_url)]
287
288 def handle_cve(self):
289-
290 # Skip CVEs we know about already
291 if self.curr_cve in self.cve_ignore:
292 return
293@@ -1347,19 +1324,19 @@ if opt.test:
294 unittest.TextTestRunner(verbosity=2).run(suite)
295 sys.exit(0)
296
297-untriaged_xml = ""
298+untriaged_json = ""
299 if opt.untriaged:
300- untriaged_xml = read_locate_cves_output(opt.untriaged)
301- args.append(untriaged_xml)
302+ untriaged_json = read_locate_cves_output(opt.untriaged)
303+ args.append(untriaged_json)
304
305 if opt.mbox:
306- untriaged_xml = read_mbox_file(opt.mbox)
307- args.append(untriaged_xml)
308+ untriaged_json = read_mbox_file(opt.mbox)
309+ args.append(untriaged_json)
310
311-debian_import_xml = ""
312+debian_import_json = ""
313 if opt.import_missing_debian and handler.debian is not None:
314- debian_import_xml = import_debian(handler)
315- args.append(debian_import_xml)
316+ debian_import_json = import_debian(handler)
317+ args.append(debian_import_json)
318
319 if len(args) == 0:
320 args.append("https://cve.mitre.org/cve/downloads/allitems.xml")
321@@ -1371,18 +1348,21 @@ for uri in args:
322 else:
323 readable = PercentageFile(uri)
324 try:
325- parser.parse(readable)
326+ if uri.endswith("json"):
327+ handler.parse_json(readable)
328+ else:
329+ parser.parse(readable)
330 except xml.sax._exceptions.SAXParseException as e:
331 print("\n\nWARNING: %s is malformed:\n%s" % (uri, e))
332 print("Aborting", file=sys.stderr)
333 sys.exit(1)
334 print('')
335
336-# Leaving our fake xml around is icky
337-if os.path.exists(untriaged_xml):
338- os.unlink(untriaged_xml)
339-if os.path.exists(debian_import_xml):
340- os.unlink(debian_import_xml)
341+# Leaving our fake json around is icky
342+if os.path.exists(untriaged_json):
343+ os.unlink(untriaged_json)
344+if os.path.exists(debian_import_json):
345+ os.unlink(debian_import_json)
346
347 if opt.refresh:
348 for cve in sorted(CVEKnownList):
349diff --git a/scripts/process_cves b/scripts/process_cves
350index fc16316..752e255 100755
351--- a/scripts/process_cves
352+++ b/scripts/process_cves
353@@ -29,8 +29,8 @@ download() {
354 url="$1"
355 # if http, then download it with wget, otherwise, try to rsync it
356 if is_http_url "$url" ; then
357- # Force the gzipped XML to be downloaded
358- # Required for nvdcve-recent.xml as of 2015-10-16
359+ # Force the gzipped JSON to be downloaded
360+ # Required for nvdcve-1.1-recent.json as of 2015-10-16
361 url="${url}.gz"
362 file=$(basename "$url")
363
364@@ -45,16 +45,16 @@ download() {
365 }
366
367 download_yearly_files() {
368- base_url="${nvd_loc:-https://nvd.nist.gov/download}/"
369+ base_url="${nvd_loc:-https://nvd.nist.gov/feeds/json/cve/1.1}/"
370 # if http, download each file sequentially, otherwise, use glob with rsync
371 if is_http_url "$base_url" ; then
372 current_year=$(date +%Y)
373 for year in $(seq 2004 "$current_year"); do
374- download "${base_url}nvdcve-${year}.xml"
375+ download "${base_url}nvdcve-1.1-${year}.json"
376 done
377 else
378 # TODO: Make Y3K compliant
379- download "${base_url}nvdcve-2*.xml"
380+ download "${base_url}nvdcve-1.1-2*.json"
381 fi
382 }
383
384@@ -89,13 +89,13 @@ update_debian() {
385 update_files() {
386 download "${mitre_loc:-https://cve.mitre.org/data/downloads}/allitems.xml"
387 download_yearly_files
388- download "https://nvd.nist.gov/download/nvdcve-recent.xml"
389+ download "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-recent.json"
390 }
391
392 full_refresh() {
393 # get PubDate
394- echo "update PubDate (check-cves --refresh nvdcve-*.xml) ..."
395- ./scripts/check-cves --refresh nvdcve-*.xml
396+ echo "update PubDate (check-cves --refresh nvdcve-*.json) ..."
397+ ./scripts/check-cves --refresh nvdcve-*.json
398
399 # get authoritative descriptions
400 echo "get authoritative descriptions (check-cves --refresh ./allitems.xml) ..."
401@@ -216,8 +216,8 @@ case "$action" in
402
403 # kernel_team_merge
404 echo "Skipping merge from kernel team"
405- echo "check-cves nvdcve-*.xml"
406- ./scripts/check-cves nvdcve-*.xml
407+ echo "check-cves nvdcve-*.json"
408+ ./scripts/check-cves nvdcve-*.json
409 process_missing_debian
410
411 check_syntax
412@@ -232,8 +232,8 @@ case "$action" in
413 echo "Skipping merge from kernel team"
414
415 # process new ones
416- echo "check-cves ./nvdcve-recent.xml"
417- ./scripts/check-cves ./nvdcve-recent.xml
418+ echo "check-cves ./nvdcve-1.1-recent.json"
419+ ./scripts/check-cves ./nvdcve-1.1-recent.json
420 echo "check-cves ./allitems.xml"
421 ./scripts/check-cves ./allitems.xml
422
423@@ -282,8 +282,8 @@ case "$action" in
424 update_files
425 # kernel_team_merge
426 echo "Skipping merge from kernel team"
427- echo "check-cves ./nvdcve-recent.xml"
428- ./scripts/check-cves ./nvdcve-recent.xml
429+ echo "check-cves ./nvdcve-1.1-recent.json"
430+ ./scripts/check-cves ./nvdcve-1.1-recent.json
431 process_missing_debian
432 check_syntax
433 nag_about_mailing_lists

Subscribers

People subscribed via source and target branches