Merge ~mdeslaur/ubuntu-cve-tracker:convert-pickle-py3 into ubuntu-cve-tracker:master

Proposed by Marc Deslauriers
Status: Merged
Merge reported by: Marc Deslauriers
Merged at revision: 8a188cdb6b6d66abadbe316f0b77a658010d6c02
Proposed branch: ~mdeslaur/ubuntu-cve-tracker:convert-pickle-py3
Merge into: ubuntu-cve-tracker:master
Diff against target: 229 lines (+139/-32)
2 files modified
scripts/convert-pickle.py (+19/-32)
scripts/test_convert_pickle.py (+120/-0)
Reviewer Review Type Date Requested Status
Alex Murray Approve
Review via email: mp+462233@code.launchpad.net

Commit message

commit 881703b3dc9ba3eabcab51fc6105c2d38933e55a
Author: Marc Deslauriers <email address hidden>
Date: Tue Mar 12 09:57:49 2024 -0400

    scripts/convert-pickle.py: convert to python 3

commit 41d0f307086483417748c0327a21a0dd80a9ef3e
Author: Marc Deslauriers <email address hidden>
Date: Tue Mar 12 09:52:48 2024 -0400

    scripts/test_convert_pickle.py: add test for convert-pickle.py

Description of the change

This merge proposal converts convert-pickle.py to python 3 and gets rid of special handling for old python 2 versions that slows down execution.

To post a comment you must log in.
8a188cd... by Marc Deslauriers

test_convert_pickle.py: clarify comment

Revision history for this message
Alex Murray (alexmurray) wrote :

LGTM - but one thing, can you please update .lauchpad.yaml to include your new test script? Thanks.

review: Approve
Revision history for this message
Steve Beattie (sbeattie) wrote :

On Tue, Mar 12, 2024 at 11:42:21PM -0000, Alex Murray wrote:
> Review: Approve
>
> LGTM - but one thing, can you please update .lauchpad.yaml to include your new test script? Thanks.

I'm planning to land my changes that adjust lpci / make check-python to
run pytest-3 against scripts/test_*.y so they will automatically get
picked up.

--
Steve Beattie
<email address hidden>

Revision history for this message
Marc Deslauriers (mdeslaur) wrote :

I've pushed this as-is as to not conflict with Steve's pending merge that will pick it up automatically.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1diff --git a/scripts/convert-pickle.py b/scripts/convert-pickle.py
2index 88258de..2eec39e 100755
3--- a/scripts/convert-pickle.py
4+++ b/scripts/convert-pickle.py
5@@ -1,14 +1,13 @@
6-#!/usr/bin/env python2
7+#!/usr/bin/env python3
8
9 # Author: Jamie Strandboge <jamie@ubuntu.com>
10-# Copyright (C) 2012 Canonical Ltd.
11+# Copyright (C) 2012-2024 Canonical Ltd.
12 #
13 # This script is distributed under the terms and conditions of the GNU General
14 # Public License, Version 2 or later. See http://www.gnu.org/copyleft/gpl.html
15 # for details.
16
17-import codecs
18-import cPickle
19+import pickle
20 import json
21 import optparse
22 import os
23@@ -18,7 +17,8 @@ import sys
24 def save_database_json(database, db_filename):
25 '''Save usn database'''
26 filename = os.path.expanduser(db_filename)
27- json.dump(database, open(filename, 'w'), -1, encoding="utf-8")
28+ with open(filename, 'w') as f:
29+ json.dump(database, f)
30
31 def convert_pickle_to_json(indb, outdb, prefix=None):
32 '''Convert a pickle database into a json'''
33@@ -26,31 +26,18 @@ def convert_pickle_to_json(indb, outdb, prefix=None):
34 if not os.path.isfile(filename):
35 return False
36
37- print >>sys.stderr, "INFO: Loading %s..." % (indb)
38- db = cPickle.load(open(indb))
39+ print(f"INFO: Loading {indb}...", file=sys.stderr)
40+ with open(indb, 'rb') as f:
41+ db = pickle.load(f, encoding="utf-8")
42
43- # Older python pickle's have a bug that stores utf-8 data incorrectly.
44- # Account for that in our top level keys (db[k]['description'] is known to
45- # have this)
46- new_db = dict()
47- count = 0
48- for k in db.keys():
49- new_db[k] = dict()
50- for j in db[k].keys():
51- if prefix and j == 'id':
52- db[k][j] = "{}{}".format(prefix, db[k][j])
53- try:
54- json.dumps(db[k][j]) # if this fails, so will json.dump later
55- new_db[k][j] = db[k][j]
56- except:
57- count += 1
58- new_db[k][j] = db[k][j].decode("utf-8", "replace")
59+ if prefix:
60+ for k in db.keys():
61+ for j in db[k].keys():
62+ if j == 'id':
63+ db[k][j] = f"{prefix}{db[k][j]}"
64
65- if count > 0:
66- print >>sys.stderr, "WARN: performed %d pickle decode conversions" % count
67-
68- print >>sys.stderr, "INFO: Saving %s..." % (outdb)
69- save_database_json(new_db, outdb)
70+ print(f"INFO: Saving {outdb}...", file=sys.stderr)
71+ save_database_json(db, outdb)
72
73 #
74 # main
75@@ -64,16 +51,16 @@ if __name__ == "__main__":
76 (opt, args) = parser.parse_args()
77
78 if not opt.infile:
79- print >>sys.stderr, "Must specify --input-file"
80+ print("Must specify --input-file", file=sys.stderr)
81 sys.exit(1)
82 elif not opt.outfile:
83- print >>sys.stderr, "Must specify --output-file"
84+ print("Must specify --output-file", file=sys.stderr)
85 sys.exit(1)
86 elif not os.path.isfile(opt.infile):
87- print >>sys.stderr, "'%s' does not exist" % (opt.infile)
88+ print(f"'{opt.infile}' does not exist", file=sys.stderr)
89 sys.exit(1)
90 elif os.path.exists(opt.outfile):
91- print >>sys.stderr, "'%s' already exists" % (opt.outfile)
92+ print(f"'{opt.outfile}' already exists", file=sys.stderr)
93 sys.exit(1)
94
95 convert_pickle_to_json(opt.infile, opt.outfile, opt.prefix)
96diff --git a/scripts/test_convert_pickle.py b/scripts/test_convert_pickle.py
97new file mode 100755
98index 0000000..0fae6d9
99--- /dev/null
100+++ b/scripts/test_convert_pickle.py
101@@ -0,0 +1,120 @@
102+#!/usr/bin/env pytest-3
103+# -*- coding: utf-8 -*-
104+#
105+# Author: Marc Deslauriers <marc.deslauriers@canonical.com>
106+# Copyright (C) 2024 Canonical Ltd.
107+#
108+# This script is distributed under the terms and conditions of the GNU General
109+# Public License, Version 3 or later. See http://www.gnu.org/copyleft/gpl.html
110+# for details.
111+#
112+# Simple tests for convert-pickle.py
113+#
114+# The test pickle files contain USN-6686-1. This USN was selected randomly
115+# because it was big and contained unicode characters.
116+
117+import pytest
118+import subprocess
119+import pickle
120+import json
121+import tempfile
122+import os
123+import unittest
124+import shutil
125+
126+
127+class TestConvertPickle(unittest.TestCase):
128+ '''This tests the convert-pickle.py utility.'''
129+
130+ def setUp(self):
131+ """Set up prior to each test_* function"""
132+ self.tmpdir = tempfile.mkdtemp(prefix='test_convert_pickle-')
133+
134+ def tearDown(self):
135+ """Clean up after each test_* function"""
136+ if os.path.exists(self.tmpdir):
137+ shutil.rmtree(self.tmpdir)
138+
139+ def _load_pickle(self, filename):
140+ '''loads a pickle file'''
141+ with open(filename, 'rb') as f:
142+ db = pickle.load(f, encoding='utf-8')
143+ return db
144+
145+ def _load_json(self, filename):
146+ '''loads a json file'''
147+ with open(filename, 'rb') as f:
148+ db = json.load(f)
149+ return db
150+
151+ def _load_text(self, filename):
152+ '''loads a file as text'''
153+ with open(filename, 'rb') as f:
154+ text = f.read()
155+ return text
156+
157+ def test_convert_pickle_v2(self):
158+ '''test converting pickle v2 file to json'''
159+ pickle_file = 'scripts/testfiles/database-protv2.pickle'
160+ json_file = os.path.join(self.tmpdir, 'test.json')
161+
162+ rc = subprocess.call(["scripts/convert-pickle.py",
163+ "-i", pickle_file,
164+ "-o", json_file])
165+ self.assertEqual(rc, 0)
166+
167+ pickle_db = self._load_pickle(pickle_file)
168+ json_db = self._load_json(json_file)
169+ json_text = self._load_text(json_file)
170+
171+ # Make sure the databases are the same
172+ self.assertEqual(pickle_db, json_db)
173+
174+ # Make sure unicode is escaped in json file
175+ self.assertTrue('黄' in json_db['6686-1']['description'])
176+ self.assertFalse('黄' in json_text.decode('utf-8'))
177+ self.assertTrue(b'\\u9ec4' in json_text)
178+
179+ # Make sure the id has no prefix
180+ self.assertEqual(json_db['6686-1']['id'], '6686-1')
181+
182+ def test_convert_pickle_prefix(self):
183+ '''test converting pickle file to json with prefix'''
184+ pickle_file = 'scripts/testfiles/database-protv2.pickle'
185+ json_file = os.path.join(self.tmpdir, 'test.json')
186+
187+ rc = subprocess.call(["scripts/convert-pickle.py",
188+ "-i", pickle_file,
189+ "-o", json_file,
190+ "-p", "TEST-"])
191+ self.assertEqual(rc, 0)
192+
193+ json_db = self._load_json(json_file)
194+
195+ self.assertEqual(json_db['6686-1']['id'], 'TEST-6686-1')
196+
197+ def test_convert_pickle_v3(self):
198+ '''test converting pickle v3 file to json'''
199+ pickle_file = 'scripts/testfiles/database-protv3.pickle'
200+ json_file = os.path.join(self.tmpdir, 'test.json')
201+
202+ rc = subprocess.call(["scripts/convert-pickle.py",
203+ "-i", pickle_file,
204+ "-o", json_file])
205+ self.assertEqual(rc, 0)
206+
207+ pickle_db = self._load_pickle(pickle_file)
208+ json_db = self._load_json(json_file)
209+ json_text = self._load_text(json_file)
210+
211+ # Make sure the databases are the same
212+ self.assertEqual(pickle_db, json_db)
213+
214+ # Make sure unicode is escaped in json file
215+ self.assertTrue('黄' in json_db['6686-1']['description'])
216+ self.assertFalse('黄' in json_text.decode('utf-8'))
217+ self.assertTrue(b'\\u9ec4' in json_text)
218+
219+ # Make sure the id has no prefix
220+ self.assertEqual(json_db['6686-1']['id'], '6686-1')
221+
222diff --git a/scripts/testfiles/database-protv2.pickle b/scripts/testfiles/database-protv2.pickle
223new file mode 100644
224index 0000000..285e2ef
225Binary files /dev/null and b/scripts/testfiles/database-protv2.pickle differ
226diff --git a/scripts/testfiles/database-protv3.pickle b/scripts/testfiles/database-protv3.pickle
227new file mode 100644
228index 0000000..2c94bcd
229Binary files /dev/null and b/scripts/testfiles/database-protv3.pickle differ

Subscribers

People subscribed via source and target branches