Merge ~alexmurray/ubuntu-cve-tracker:nvd-json into ubuntu-cve-tracker:master
- Git
- lp:~alexmurray/ubuntu-cve-tracker
- nvd-json
- Merge into master
Proposed by
Alex Murray
Status: | Merged |
---|---|
Merged at revision: | 8dea86e836bbdb0e1b1e81d29c41c5ffcc6f0ab1 |
Proposed branch: | ~alexmurray/ubuntu-cve-tracker:nvd-json |
Merge into: | ubuntu-cve-tracker:master |
Diff against target: |
433 lines (+111/-130) 3 files modified
.gitignore (+1/-0) scripts/check-cves (+96/-116) scripts/process_cves (+14/-14) |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Alex Murray | Approve | ||
Review via email: mp+372964@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Alex Murray (alexmurray) wrote : | # |
Revision history for this message
Alex Murray (alexmurray) wrote : | # |
Approving as no feedback in nearly 2 weeks.
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | diff --git a/.gitignore b/.gitignore |
2 | index 766252e..ebc4960 100644 |
3 | --- a/.gitignore |
4 | +++ b/.gitignore |
5 | @@ -2,6 +2,7 @@ |
6 | .*.swp |
7 | allitems.xml* |
8 | nvdcve*.xml* |
9 | +nvdcve*.json* |
10 | database.pickle* |
11 | database-all.pickle* |
12 | embargoed |
13 | diff --git a/scripts/check-cves b/scripts/check-cves |
14 | index 44e814b..bb77a33 100755 |
15 | --- a/scripts/check-cves |
16 | +++ b/scripts/check-cves |
17 | @@ -15,6 +15,7 @@ |
18 | from __future__ import print_function |
19 | |
20 | import datetime |
21 | +import json |
22 | import optparse |
23 | import os |
24 | import os.path |
25 | @@ -253,53 +254,40 @@ class PercentageFile(object): |
26 | return self.f.close() |
27 | |
28 | |
29 | -def get_fake_nvd_xml_header(): |
30 | - '''Generate NVD header xml''' |
31 | - today = "%s-%s-%s" % (datetime.date.today().year, |
32 | - datetime.date.today().month, |
33 | - datetime.date.today().day) |
34 | - return '''<?xml version='1.0' encoding='UTF-8'?> |
35 | -<nvd xmlns="http://nvd.nist.gov/feeds/cve/1.2" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" pub_date="%s" xsi:schemaLocation="http://nvd.nist.gov/feeds/cve/1.2 http://nvd.nist.gov/schema/nvdcve.xsd" nvd_xml_version="1.2"> |
36 | -''' % (today) |
37 | - |
38 | - |
39 | -def get_fake_nvd_xml_footer(): |
40 | - '''Generate NVD footer xml''' |
41 | - return '''</nvd>\n''' |
42 | - |
43 | - |
44 | -def convert_to_xml(cves, cve, desc): |
45 | - s = ''' <entry type="CVE" severity="Medium" seq="%s" published="%s" name="%s" modified="%s" CVSS_version="2.0" CVSS_vector="(AV:N/AC:M/Au:N/C:N/I:P/A:P)" CVSS_score="0.0" CVSS_impact_subscore="0.0" CVSS_exploit_subscore="0.0" CVSS_base_score="0.0"> |
46 | - <desc> |
47 | - <descript source="cve">%s</descript> |
48 | - </desc> |
49 | - <loss_types /> |
50 | - <range /> |
51 | -''' % (re.sub(r'CVE\-', '', cve), |
52 | - cves[cve]['date'].strftime("%Y-%m-%d"), |
53 | - cve, |
54 | - cves[cve]['date'].strftime("%Y-%m-%d"), |
55 | - desc) |
56 | - |
57 | - if cve in cves and 'refs' in cves[cve] and len(cves[cve]['refs']) > 0: |
58 | - s += ' <refs>\n' |
59 | - for r in cves[cve]['refs']: |
60 | - s += '<ref url="%s" source="MISC">%s</ref>\n' % (r, r) |
61 | - s += ' </refs>\n' |
62 | - else: |
63 | - s += ' <refs />\n' |
64 | +def convert_to_nvd(cves=[], desc=""): |
65 | + # convert to nvd format dict (like nvd json) |
66 | + nvd = {"CVE_data_type": "CVE", |
67 | + "CVE_data_format": "MITRE", |
68 | + "CVE_data_version": "4.0", |
69 | + "CVE_data_timestamp": datetime.datetime.utcnow().strftime("%Y-%m-%dT%H:%M:%SZ"), |
70 | + "CVE_Items": []} |
71 | |
72 | - s += ''' <vuln_soft /> |
73 | - </entry> |
74 | -''' |
75 | - return s |
76 | + keys = cves.keys() |
77 | + keys.sort() |
78 | + for cve in keys: |
79 | + refs = [] |
80 | + if cve in cves and 'refs' in cves[cve] and len(cves[cve]['refs']) > 0: |
81 | + for r in cves[cve]['refs']: |
82 | + refs.append({"name": r, |
83 | + "url": r, |
84 | + "refsource": "MISC"}) |
85 | + nvd["CVE_Items"].append({"cve": |
86 | + {"data_type": "CVE", |
87 | + "data_format": "MITRE", |
88 | + "data_version": "4.0", |
89 | + "CVE_data_meta": {"ID": cve}, |
90 | + "references": {"reference_data": refs}, |
91 | + "description": {"description_data": [ |
92 | + {"lang": "en", |
93 | + "value": desc(cve) if desc else ""} |
94 | + ]}}, |
95 | + "publishedDate": cves[cve]['date'].strftime("%Y-%m-%dT%H:%M:%SZ")}) |
96 | + return nvd |
97 | |
98 | |
99 | def import_debian(handler): |
100 | '''Import Debian CVEs and DSAs missing from the tracker''' |
101 | cves = dict() |
102 | - cve = None |
103 | - xml = get_fake_nvd_xml_header() |
104 | |
105 | today = datetime.date.today() |
106 | known = set(CVEKnownList + CVEIgnoreList) |
107 | @@ -370,15 +358,9 @@ def import_debian(handler): |
108 | if opt.verbose: |
109 | print("Processing %s: %s (%s)" % (cve, handler.debian[cve]['desc'], cves[cve]['date']), file=sys.stderr) |
110 | |
111 | - keys = cves.keys() |
112 | - keys.sort() |
113 | - for c in keys: |
114 | - xml += convert_to_xml(cves, c, cves[c]['subject']) |
115 | - |
116 | - xml += get_fake_nvd_xml_footer() |
117 | - |
118 | - tmp, tmpname = tempfile.mkstemp(prefix='debian-import_') |
119 | - os.write(tmp, xml) |
120 | + nvd = convert_to_nvd(cves, lambda cve: cves[cve]['subject']) |
121 | + tmp, tmpname = tempfile.mkstemp(prefix='debian-import_', suffix='.json') |
122 | + os.write(tmp, json.dumps(nvd)) |
123 | os.close(tmp) |
124 | |
125 | return tmpname |
126 | @@ -386,14 +368,14 @@ def import_debian(handler): |
127 | |
128 | def read_locate_cves_output(f): |
129 | '''Read in output of UCT/scripts/locate_cves.py |
130 | - This is sneaky because we read in the output and then output a fake XML |
131 | + This is sneaky because we read in the output and then output a fake JSON |
132 | file for processing. |
133 | ''' |
134 | if not os.path.isfile(f): |
135 | print("'%s' not a file" % f, file=sys.stderr) |
136 | sys.exit(1) |
137 | |
138 | - name = os.path.abspath(f + ".xml") |
139 | + name = os.path.abspath(f + ".json") |
140 | if os.path.exists(name): |
141 | print("'%s' already exists" % name, file=sys.stderr) |
142 | sys.exit(1) |
143 | @@ -459,18 +441,11 @@ def read_locate_cves_output(f): |
144 | url = "http://www.openwall.com/lists/oss-security/%s" % (cves[cve]['date'].strftime("%Y/%m/%d")) |
145 | cves[cve].setdefault('refs', [] + [url]) |
146 | |
147 | - xml = get_fake_nvd_xml_header() |
148 | - |
149 | - keys = cves.keys() |
150 | - keys.sort() |
151 | - for c in keys: |
152 | - desc = '''ML-Date: %s, ML-Subject: %s''' % (cves[c]['date'], escape(cves[c]['subject'])) |
153 | - xml += convert_to_xml(cves, c, desc) |
154 | - |
155 | - xml += get_fake_nvd_xml_footer() |
156 | + nvd = convert_to_nvd(cves, lambda c: '''ML-Date: %s, ML-Subject: %s''' % |
157 | + (cves[c]['date'], escape(cves[c]['subject']))) |
158 | |
159 | tmp, tmpname = tempfile.mkstemp() |
160 | - os.write(tmp, xml) |
161 | + os.write(tmp, json.dumps(nvd)) |
162 | |
163 | os.close(tmp) |
164 | shutil.move(tmpname, name) |
165 | @@ -492,9 +467,9 @@ def read_mbox_file(f): |
166 | os.write(tmp, out) |
167 | os.close(tmp) |
168 | |
169 | - xml_file = read_locate_cves_output(tmpname) |
170 | + json_file = read_locate_cves_output(tmpname) |
171 | os.unlink(tmpname) |
172 | - return xml_file |
173 | + return json_file |
174 | |
175 | |
176 | def dpkg_compare_versions(v1, op, v2): |
177 | @@ -551,7 +526,7 @@ class CVEHandler(xml.sax.handler.ContentHandler): |
178 | self.debian = None |
179 | |
180 | # File-type detection |
181 | - self.use_nvd = None |
182 | + |
183 | |
184 | # Load debian CVE states, if configured |
185 | if 'secure_testing_path' in cve_lib.config: |
186 | @@ -573,43 +548,53 @@ class CVEHandler(xml.sax.handler.ContentHandler): |
187 | print("---------------------------") |
188 | print("%5d total CVEs triaged" % (self.num_added + self.num_ignored)) |
189 | |
190 | - def startElement(self, name, attrs): |
191 | - # NVD items |
192 | - if name == "entry": |
193 | - # Detect broken XML |
194 | - if self.use_nvd is not None and not self.use_nvd: |
195 | - raise KeyError("saw 'entry' in Mitre XML") |
196 | - if self.use_nvd is None and opt.verbose: |
197 | - print("Detected NVD XML schema", file=sys.stderr) |
198 | - self.use_nvd = True |
199 | - self.curr_cve = attrs['name'] |
200 | - self.curr_public = attrs['published'] |
201 | + def parse_json(self, fp): |
202 | + template_nvd = {"CVE_data_type": "CVE", |
203 | + "CVE_data_format": "MITRE", |
204 | + "CVE_data_version": "4.0"} |
205 | + nvd = json.load(fp) |
206 | + # check for expected fields |
207 | + for key in template_nvd.keys(): |
208 | + if key not in nvd or nvd[key] != template_nvd[key]: |
209 | + raise KeyError("NVD JSON in '%s' seems invalid" % fp.name) |
210 | + for item in nvd["CVE_Items"]: |
211 | + template_cve = {"data_type": "CVE", |
212 | + "data_format": "MITRE", |
213 | + "data_version": "4.0"} |
214 | + if "publishedDate" in item: |
215 | + # convert from YYYY-MM-DDTHH:MM:SSZ to YYYY-MM-DD HH:MM:SS UTC |
216 | + self.curr_public = item["publishedDate"].replace("T", " ").replace("Z", ":00 UTC") |
217 | + else: |
218 | + self.curr_public = None |
219 | + cve = item["cve"] |
220 | + for key in template_cve.keys(): |
221 | + if key not in cve or cve[key] != template_cve[key]: |
222 | + raise KeyError("NVD JSON in '%s' seems invalid" % fp.name) |
223 | + |
224 | + metadata = cve["CVE_data_meta"] |
225 | + self.curr_cve = metadata["ID"] |
226 | self.curr_refs = [] |
227 | - self.curr_url = None |
228 | self.curr_desc = None |
229 | - self.curr_desc_ready = False |
230 | - if name == "descript": |
231 | - self.curr_chars_collect = True |
232 | - self.curr_chars = "" |
233 | - if attrs['source'] == 'cve': |
234 | - self.curr_desc_ready = True |
235 | - # Mitre items |
236 | + for ref in cve["references"]["reference_data"]: |
237 | + self.curr_refs += [(ref["refsource"], ref["name"], ref["url"])] |
238 | + # find an english description |
239 | + for desc in cve["description"]["description_data"]: |
240 | + if desc["lang"] == "en": |
241 | + self.curr_desc = desc["value"] |
242 | + self.handle_cve() |
243 | + |
244 | + |
245 | + def startElement(self, name, attrs): |
246 | if name == "item": |
247 | - # Detect broken XML |
248 | - if self.use_nvd is not None and self.use_nvd: |
249 | - raise KeyError("saw 'item' in NVD XML") |
250 | - if self.use_nvd is None and opt.verbose: |
251 | - print("Detected Mitre XML schema", file=sys.stderr) |
252 | - self.use_nvd = False |
253 | + if opt.verbose: |
254 | + print("Parsing Mitre XML schema", file=sys.stderr) |
255 | self.curr_cve = attrs['name'] |
256 | self.curr_refs = [] |
257 | self.curr_url = None |
258 | self.curr_desc = None |
259 | - if name == "desc" and self.use_nvd is False: |
260 | + if name == "desc": |
261 | self.curr_chars_collect = True |
262 | self.curr_chars = "" |
263 | - |
264 | - # Common |
265 | if name == "ref": |
266 | self.curr_chars_collect = True |
267 | self.curr_chars = "" |
268 | @@ -626,23 +611,15 @@ class CVEHandler(xml.sax.handler.ContentHandler): |
269 | |
270 | def endElement(self, name): |
271 | self.curr_chars_collect = False |
272 | - # NVD items |
273 | - if name == "descript" and self.curr_desc_ready: |
274 | - self.curr_desc = self.curr_chars.encode("ascii", "replace") |
275 | - self.curr_desc_ready = False |
276 | - if name == "entry": |
277 | - self.handle_cve() |
278 | # Mitre items |
279 | - if name == "desc" and not self.use_nvd: |
280 | + if name == "desc": |
281 | self.curr_desc = self.curr_chars.encode("ascii", "replace") |
282 | if name == "item": |
283 | self.handle_cve() |
284 | - # Common |
285 | if name == "ref": |
286 | self.curr_refs += [(self.curr_source, self.curr_chars.encode("ascii", "replace"), self.curr_url)] |
287 | |
288 | def handle_cve(self): |
289 | - |
290 | # Skip CVEs we know about already |
291 | if self.curr_cve in self.cve_ignore: |
292 | return |
293 | @@ -1347,19 +1324,19 @@ if opt.test: |
294 | unittest.TextTestRunner(verbosity=2).run(suite) |
295 | sys.exit(0) |
296 | |
297 | -untriaged_xml = "" |
298 | +untriaged_json = "" |
299 | if opt.untriaged: |
300 | - untriaged_xml = read_locate_cves_output(opt.untriaged) |
301 | - args.append(untriaged_xml) |
302 | + untriaged_json = read_locate_cves_output(opt.untriaged) |
303 | + args.append(untriaged_json) |
304 | |
305 | if opt.mbox: |
306 | - untriaged_xml = read_mbox_file(opt.mbox) |
307 | - args.append(untriaged_xml) |
308 | + untriaged_json = read_mbox_file(opt.mbox) |
309 | + args.append(untriaged_json) |
310 | |
311 | -debian_import_xml = "" |
312 | +debian_import_json = "" |
313 | if opt.import_missing_debian and handler.debian is not None: |
314 | - debian_import_xml = import_debian(handler) |
315 | - args.append(debian_import_xml) |
316 | + debian_import_json = import_debian(handler) |
317 | + args.append(debian_import_json) |
318 | |
319 | if len(args) == 0: |
320 | args.append("https://cve.mitre.org/cve/downloads/allitems.xml") |
321 | @@ -1371,18 +1348,21 @@ for uri in args: |
322 | else: |
323 | readable = PercentageFile(uri) |
324 | try: |
325 | - parser.parse(readable) |
326 | + if uri.endswith("json"): |
327 | + handler.parse_json(readable) |
328 | + else: |
329 | + parser.parse(readable) |
330 | except xml.sax._exceptions.SAXParseException as e: |
331 | print("\n\nWARNING: %s is malformed:\n%s" % (uri, e)) |
332 | print("Aborting", file=sys.stderr) |
333 | sys.exit(1) |
334 | print('') |
335 | |
336 | -# Leaving our fake xml around is icky |
337 | -if os.path.exists(untriaged_xml): |
338 | - os.unlink(untriaged_xml) |
339 | -if os.path.exists(debian_import_xml): |
340 | - os.unlink(debian_import_xml) |
341 | +# Leaving our fake json around is icky |
342 | +if os.path.exists(untriaged_json): |
343 | + os.unlink(untriaged_json) |
344 | +if os.path.exists(debian_import_json): |
345 | + os.unlink(debian_import_json) |
346 | |
347 | if opt.refresh: |
348 | for cve in sorted(CVEKnownList): |
349 | diff --git a/scripts/process_cves b/scripts/process_cves |
350 | index fc16316..752e255 100755 |
351 | --- a/scripts/process_cves |
352 | +++ b/scripts/process_cves |
353 | @@ -29,8 +29,8 @@ download() { |
354 | url="$1" |
355 | # if http, then download it with wget, otherwise, try to rsync it |
356 | if is_http_url "$url" ; then |
357 | - # Force the gzipped XML to be downloaded |
358 | - # Required for nvdcve-recent.xml as of 2015-10-16 |
359 | + # Force the gzipped JSON to be downloaded |
360 | + # Required for nvdcve-1.1-recent.json as of 2015-10-16 |
361 | url="${url}.gz" |
362 | file=$(basename "$url") |
363 | |
364 | @@ -45,16 +45,16 @@ download() { |
365 | } |
366 | |
367 | download_yearly_files() { |
368 | - base_url="${nvd_loc:-https://nvd.nist.gov/download}/" |
369 | + base_url="${nvd_loc:-https://nvd.nist.gov/feeds/json/cve/1.1}/" |
370 | # if http, download each file sequentially, otherwise, use glob with rsync |
371 | if is_http_url "$base_url" ; then |
372 | current_year=$(date +%Y) |
373 | for year in $(seq 2004 "$current_year"); do |
374 | - download "${base_url}nvdcve-${year}.xml" |
375 | + download "${base_url}nvdcve-1.1-${year}.json" |
376 | done |
377 | else |
378 | # TODO: Make Y3K compliant |
379 | - download "${base_url}nvdcve-2*.xml" |
380 | + download "${base_url}nvdcve-1.1-2*.json" |
381 | fi |
382 | } |
383 | |
384 | @@ -89,13 +89,13 @@ update_debian() { |
385 | update_files() { |
386 | download "${mitre_loc:-https://cve.mitre.org/data/downloads}/allitems.xml" |
387 | download_yearly_files |
388 | - download "https://nvd.nist.gov/download/nvdcve-recent.xml" |
389 | + download "https://nvd.nist.gov/feeds/json/cve/1.1/nvdcve-1.1-recent.json" |
390 | } |
391 | |
392 | full_refresh() { |
393 | # get PubDate |
394 | - echo "update PubDate (check-cves --refresh nvdcve-*.xml) ..." |
395 | - ./scripts/check-cves --refresh nvdcve-*.xml |
396 | + echo "update PubDate (check-cves --refresh nvdcve-*.json) ..." |
397 | + ./scripts/check-cves --refresh nvdcve-*.json |
398 | |
399 | # get authoritative descriptions |
400 | echo "get authoritative descriptions (check-cves --refresh ./allitems.xml) ..." |
401 | @@ -216,8 +216,8 @@ case "$action" in |
402 | |
403 | # kernel_team_merge |
404 | echo "Skipping merge from kernel team" |
405 | - echo "check-cves nvdcve-*.xml" |
406 | - ./scripts/check-cves nvdcve-*.xml |
407 | + echo "check-cves nvdcve-*.json" |
408 | + ./scripts/check-cves nvdcve-*.json |
409 | process_missing_debian |
410 | |
411 | check_syntax |
412 | @@ -232,8 +232,8 @@ case "$action" in |
413 | echo "Skipping merge from kernel team" |
414 | |
415 | # process new ones |
416 | - echo "check-cves ./nvdcve-recent.xml" |
417 | - ./scripts/check-cves ./nvdcve-recent.xml |
418 | + echo "check-cves ./nvdcve-1.1-recent.json" |
419 | + ./scripts/check-cves ./nvdcve-1.1-recent.json |
420 | echo "check-cves ./allitems.xml" |
421 | ./scripts/check-cves ./allitems.xml |
422 | |
423 | @@ -282,8 +282,8 @@ case "$action" in |
424 | update_files |
425 | # kernel_team_merge |
426 | echo "Skipping merge from kernel team" |
427 | - echo "check-cves ./nvdcve-recent.xml" |
428 | - ./scripts/check-cves ./nvdcve-recent.xml |
429 | + echo "check-cves ./nvdcve-1.1-recent.json" |
430 | + ./scripts/check-cves ./nvdcve-1.1-recent.json |
431 | process_missing_debian |
432 | check_syntax |
433 | nag_about_mailing_lists |
FYI - I plan to merge this on Wednesday if there is no feedback by then.