Merge ~mdeslaur/ubuntu-cve-tracker:convert-pickle-py3 into ubuntu-cve-tracker:master

Proposed by Marc Deslauriers
Status: Merged
Merge reported by: Marc Deslauriers
Merged at revision: 8a188cdb6b6d66abadbe316f0b77a658010d6c02
Proposed branch: ~mdeslaur/ubuntu-cve-tracker:convert-pickle-py3
Merge into: ubuntu-cve-tracker:master
Diff against target: 229 lines (+139/-32)
2 files modified
scripts/convert-pickle.py (+19/-32)
scripts/test_convert_pickle.py (+120/-0)
Reviewer Review Type Date Requested Status
Alex Murray Approve
Review via email: mp+462233@code.launchpad.net

Commit message

commit 881703b3dc9ba3eabcab51fc6105c2d38933e55a
Author: Marc Deslauriers <email address hidden>
Date: Tue Mar 12 09:57:49 2024 -0400

    scripts/convert-pickle.py: convert to python 3

commit 41d0f307086483417748c0327a21a0dd80a9ef3e
Author: Marc Deslauriers <email address hidden>
Date: Tue Mar 12 09:52:48 2024 -0400

    scripts/test_convert_pickle.py: add test for convert-pickle.py

Description of the change

This merge proposal converts convert-pickle.py to python 3 and gets rid of special handling for old python 2 versions that slows down execution.

To post a comment you must log in.
8a188cd... by Marc Deslauriers

test_convert_pickle.py: clarify comment

Revision history for this message
Alex Murray (alexmurray) wrote :

LGTM - but one thing, can you please update .lauchpad.yaml to include your new test script? Thanks.

review: Approve
Revision history for this message
Steve Beattie (sbeattie) wrote :

On Tue, Mar 12, 2024 at 11:42:21PM -0000, Alex Murray wrote:
> Review: Approve
>
> LGTM - but one thing, can you please update .lauchpad.yaml to include your new test script? Thanks.

I'm planning to land my changes that adjust lpci / make check-python to
run pytest-3 against scripts/test_*.y so they will automatically get
picked up.

--
Steve Beattie
<email address hidden>

Revision history for this message
Marc Deslauriers (mdeslaur) wrote :

I've pushed this as-is as to not conflict with Steve's pending merge that will pick it up automatically.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
diff --git a/scripts/convert-pickle.py b/scripts/convert-pickle.py
index 88258de..2eec39e 100755
--- a/scripts/convert-pickle.py
+++ b/scripts/convert-pickle.py
@@ -1,14 +1,13 @@
1#!/usr/bin/env python21#!/usr/bin/env python3
22
3# Author: Jamie Strandboge <jamie@ubuntu.com>3# Author: Jamie Strandboge <jamie@ubuntu.com>
4# Copyright (C) 2012 Canonical Ltd.4# Copyright (C) 2012-2024 Canonical Ltd.
5#5#
6# This script is distributed under the terms and conditions of the GNU General6# This script is distributed under the terms and conditions of the GNU General
7# Public License, Version 2 or later. See http://www.gnu.org/copyleft/gpl.html7# Public License, Version 2 or later. See http://www.gnu.org/copyleft/gpl.html
8# for details.8# for details.
99
10import codecs10import pickle
11import cPickle
12import json11import json
13import optparse12import optparse
14import os13import os
@@ -18,7 +17,8 @@ import sys
18def save_database_json(database, db_filename):17def save_database_json(database, db_filename):
19 '''Save usn database'''18 '''Save usn database'''
20 filename = os.path.expanduser(db_filename)19 filename = os.path.expanduser(db_filename)
21 json.dump(database, open(filename, 'w'), -1, encoding="utf-8")20 with open(filename, 'w') as f:
21 json.dump(database, f)
2222
23def convert_pickle_to_json(indb, outdb, prefix=None):23def convert_pickle_to_json(indb, outdb, prefix=None):
24 '''Convert a pickle database into a json'''24 '''Convert a pickle database into a json'''
@@ -26,31 +26,18 @@ def convert_pickle_to_json(indb, outdb, prefix=None):
26 if not os.path.isfile(filename):26 if not os.path.isfile(filename):
27 return False27 return False
2828
29 print >>sys.stderr, "INFO: Loading %s..." % (indb)29 print(f"INFO: Loading {indb}...", file=sys.stderr)
30 db = cPickle.load(open(indb))30 with open(indb, 'rb') as f:
31 db = pickle.load(f, encoding="utf-8")
3132
32 # Older python pickle's have a bug that stores utf-8 data incorrectly.33 if prefix:
33 # Account for that in our top level keys (db[k]['description'] is known to34 for k in db.keys():
34 # have this)35 for j in db[k].keys():
35 new_db = dict()36 if j == 'id':
36 count = 037 db[k][j] = f"{prefix}{db[k][j]}"
37 for k in db.keys():
38 new_db[k] = dict()
39 for j in db[k].keys():
40 if prefix and j == 'id':
41 db[k][j] = "{}{}".format(prefix, db[k][j])
42 try:
43 json.dumps(db[k][j]) # if this fails, so will json.dump later
44 new_db[k][j] = db[k][j]
45 except:
46 count += 1
47 new_db[k][j] = db[k][j].decode("utf-8", "replace")
4838
49 if count > 0:39 print(f"INFO: Saving {outdb}...", file=sys.stderr)
50 print >>sys.stderr, "WARN: performed %d pickle decode conversions" % count40 save_database_json(db, outdb)
51
52 print >>sys.stderr, "INFO: Saving %s..." % (outdb)
53 save_database_json(new_db, outdb)
5441
55#42#
56# main43# main
@@ -64,16 +51,16 @@ if __name__ == "__main__":
64 (opt, args) = parser.parse_args()51 (opt, args) = parser.parse_args()
6552
66 if not opt.infile:53 if not opt.infile:
67 print >>sys.stderr, "Must specify --input-file"54 print("Must specify --input-file", file=sys.stderr)
68 sys.exit(1)55 sys.exit(1)
69 elif not opt.outfile:56 elif not opt.outfile:
70 print >>sys.stderr, "Must specify --output-file"57 print("Must specify --output-file", file=sys.stderr)
71 sys.exit(1)58 sys.exit(1)
72 elif not os.path.isfile(opt.infile):59 elif not os.path.isfile(opt.infile):
73 print >>sys.stderr, "'%s' does not exist" % (opt.infile)60 print(f"'{opt.infile}' does not exist", file=sys.stderr)
74 sys.exit(1)61 sys.exit(1)
75 elif os.path.exists(opt.outfile):62 elif os.path.exists(opt.outfile):
76 print >>sys.stderr, "'%s' already exists" % (opt.outfile)63 print(f"'{opt.outfile}' already exists", file=sys.stderr)
77 sys.exit(1)64 sys.exit(1)
7865
79 convert_pickle_to_json(opt.infile, opt.outfile, opt.prefix)66 convert_pickle_to_json(opt.infile, opt.outfile, opt.prefix)
diff --git a/scripts/test_convert_pickle.py b/scripts/test_convert_pickle.py
80new file mode 10075567new file mode 100755
index 0000000..0fae6d9
--- /dev/null
+++ b/scripts/test_convert_pickle.py
@@ -0,0 +1,120 @@
1#!/usr/bin/env pytest-3
2# -*- coding: utf-8 -*-
3#
4# Author: Marc Deslauriers <marc.deslauriers@canonical.com>
5# Copyright (C) 2024 Canonical Ltd.
6#
7# This script is distributed under the terms and conditions of the GNU General
8# Public License, Version 3 or later. See http://www.gnu.org/copyleft/gpl.html
9# for details.
10#
11# Simple tests for convert-pickle.py
12#
13# The test pickle files contain USN-6686-1. This USN was selected randomly
14# because it was big and contained unicode characters.
15
16import pytest
17import subprocess
18import pickle
19import json
20import tempfile
21import os
22import unittest
23import shutil
24
25
26class TestConvertPickle(unittest.TestCase):
27 '''This tests the convert-pickle.py utility.'''
28
29 def setUp(self):
30 """Set up prior to each test_* function"""
31 self.tmpdir = tempfile.mkdtemp(prefix='test_convert_pickle-')
32
33 def tearDown(self):
34 """Clean up after each test_* function"""
35 if os.path.exists(self.tmpdir):
36 shutil.rmtree(self.tmpdir)
37
38 def _load_pickle(self, filename):
39 '''loads a pickle file'''
40 with open(filename, 'rb') as f:
41 db = pickle.load(f, encoding='utf-8')
42 return db
43
44 def _load_json(self, filename):
45 '''loads a json file'''
46 with open(filename, 'rb') as f:
47 db = json.load(f)
48 return db
49
50 def _load_text(self, filename):
51 '''loads a file as text'''
52 with open(filename, 'rb') as f:
53 text = f.read()
54 return text
55
56 def test_convert_pickle_v2(self):
57 '''test converting pickle v2 file to json'''
58 pickle_file = 'scripts/testfiles/database-protv2.pickle'
59 json_file = os.path.join(self.tmpdir, 'test.json')
60
61 rc = subprocess.call(["scripts/convert-pickle.py",
62 "-i", pickle_file,
63 "-o", json_file])
64 self.assertEqual(rc, 0)
65
66 pickle_db = self._load_pickle(pickle_file)
67 json_db = self._load_json(json_file)
68 json_text = self._load_text(json_file)
69
70 # Make sure the databases are the same
71 self.assertEqual(pickle_db, json_db)
72
73 # Make sure unicode is escaped in json file
74 self.assertTrue('黄' in json_db['6686-1']['description'])
75 self.assertFalse('黄' in json_text.decode('utf-8'))
76 self.assertTrue(b'\\u9ec4' in json_text)
77
78 # Make sure the id has no prefix
79 self.assertEqual(json_db['6686-1']['id'], '6686-1')
80
81 def test_convert_pickle_prefix(self):
82 '''test converting pickle file to json with prefix'''
83 pickle_file = 'scripts/testfiles/database-protv2.pickle'
84 json_file = os.path.join(self.tmpdir, 'test.json')
85
86 rc = subprocess.call(["scripts/convert-pickle.py",
87 "-i", pickle_file,
88 "-o", json_file,
89 "-p", "TEST-"])
90 self.assertEqual(rc, 0)
91
92 json_db = self._load_json(json_file)
93
94 self.assertEqual(json_db['6686-1']['id'], 'TEST-6686-1')
95
96 def test_convert_pickle_v3(self):
97 '''test converting pickle v3 file to json'''
98 pickle_file = 'scripts/testfiles/database-protv3.pickle'
99 json_file = os.path.join(self.tmpdir, 'test.json')
100
101 rc = subprocess.call(["scripts/convert-pickle.py",
102 "-i", pickle_file,
103 "-o", json_file])
104 self.assertEqual(rc, 0)
105
106 pickle_db = self._load_pickle(pickle_file)
107 json_db = self._load_json(json_file)
108 json_text = self._load_text(json_file)
109
110 # Make sure the databases are the same
111 self.assertEqual(pickle_db, json_db)
112
113 # Make sure unicode is escaped in json file
114 self.assertTrue('黄' in json_db['6686-1']['description'])
115 self.assertFalse('黄' in json_text.decode('utf-8'))
116 self.assertTrue(b'\\u9ec4' in json_text)
117
118 # Make sure the id has no prefix
119 self.assertEqual(json_db['6686-1']['id'], '6686-1')
120
diff --git a/scripts/testfiles/database-protv2.pickle b/scripts/testfiles/database-protv2.pickle
0new file mode 100644121new file mode 100644
index 0000000..285e2ef
1Binary files /dev/null and b/scripts/testfiles/database-protv2.pickle differ122Binary files /dev/null and b/scripts/testfiles/database-protv2.pickle differ
diff --git a/scripts/testfiles/database-protv3.pickle b/scripts/testfiles/database-protv3.pickle
2new file mode 100644123new file mode 100644
index 0000000..2c94bcd
3Binary files /dev/null and b/scripts/testfiles/database-protv3.pickle differ124Binary files /dev/null and b/scripts/testfiles/database-protv3.pickle differ

Subscribers

People subscribed via source and target branches