Merge lp:~ev/oops-repository/whoopsie-daisy into lp:oops-repository

Proposed by Evan
Status: Merged
Approved by: j.c.sackett
Approved revision: 28
Merged at revision: 13
Proposed branch: lp:~ev/oops-repository/whoopsie-daisy
Merge into: lp:oops-repository
Diff against target: 454 lines (+94/-114)
8 files modified
oopsrepository/cassandra.py (+1/-1)
oopsrepository/oopses.py (+0/-94)
oopsrepository/schema.py (+17/-1)
oopsrepository/testing/cassandra.py (+1/-1)
oopsrepository/testing/matchers.py (+11/-2)
oopsrepository/tests/test_matchers.py (+1/-1)
oopsrepository/tests/test_oopses.py (+62/-13)
oopsrepository/tests/test_schema.py (+1/-1)
To merge this branch: bzr merge lp:~ev/oops-repository/whoopsie-daisy
Reviewer Review Type Date Requested Status
j.c.sackett (community) Approve
Robert Collins Pending
Review via email: mp+97714@code.launchpad.net

Description of the change

Apologies for the accidental deletion there.

This branch:
 - Brings oops-repository up to the latest pycassa API.
 - Replaces the insert_bson function with insert_dict, as we want to inspect the contents of the OOPS in the WSGI code to make a determination as to how we're going to process it, and it doesn't make sense to parse the BSON string twice.
 - Adds a bucket method on the oops module to store crashes that are of the same issue in a single collection.
 - Counts the additions to these buckets at a day interval.
 - Adds tests.

To post a comment you must log in.
Revision history for this message
j.c.sackett (jcsackett) wrote :

This largely looks okay to me, but I'm holding off on voting b/c I'm not certain what has changed in oopses.py; I can see the LoC count is different, and I think everything looks alright, but could you post a diff here of just that file against the current trunk?

Or, if someone with greater knowledge of the oops-repository code thinks this is good, that person is welcome to just mark approved and run with it.

review: Needs Information
Revision history for this message
Evan (ev) wrote :
Revision history for this message
j.c.sackett (jcsackett) wrote :

Thanks so much.

This looks good to me.

review: Approve
27. By Evan on 2012-03-23

Handle unicode data in OOPSes.

28. By Evan on 2012-03-23

Missed that comment in DESIGN.txt about all the columns being strings. Simplify the previous commit by just setting a default_validation_class.

Revision history for this message
Evan (ev) wrote :

I've added a commit to handle unicode data in OOPSes by setting a default_validation_class of utf8. The previous behavior was causing exceptions as submissions were made to Ubuntu's crash database:
https://pastebin.canonical.com/62948/

I originally implemented this by encoding unicode dictionary keys and values as utf8 in oops-repository, but then I noticed the comment in DESIGN.txt about the columns all being strings. If that interpretation is incorrect, do feel free to take r27 instead of r28.

Revision history for this message
Robert Collins (lifeless) wrote :

This is landable - we need to get you direct push rights, for now, perhaps jc can land it for you.

Revision history for this message
j.c.sackett (jcsackett) wrote :

I'll assume that counts as a vote for "Approve" on your pending, Robert, and I'm happy to land it.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'oopsrepository/cassandra.py'
2--- oopsrepository/cassandra.py 2011-02-26 19:47:58 +0000
3+++ oopsrepository/cassandra.py 2012-03-23 23:37:20 +0000
4@@ -6,7 +6,7 @@
5
6 """Things to ease working with cassandra."""
7
8-from pycassa.cassandra.Cassandra import InvalidRequestException
9+from pycassa.cassandra.ttypes import InvalidRequestException
10
11 def workaround_1779(callable, *args, **kwargs):
12 """Workaround cassandra not being able to do concurrent schema edits.
13
14=== added file 'oopsrepository/oopses.py'
15--- oopsrepository/oopses.py 1970-01-01 00:00:00 +0000
16+++ oopsrepository/oopses.py 2012-03-23 23:37:20 +0000
17@@ -0,0 +1,116 @@
18+# oops-repository is Copyright 2011 Canonical Ltd.
19+#
20+# Canonical Ltd ("Canonical") distributes the oops-repository source code under
21+# the GNU Affero General Public License, version 3 ("AGPLv3"). See the file
22+# LICENSE in the source tree for more information.
23+
24+"""basic operations on oopses in the db."""
25+
26+import json
27+import bson
28+import time
29+import uuid
30+
31+import pycassa
32+from pycassa.index import create_index_expression, LT, create_index_clause
33+from pycassa.cassandra.ttypes import IndexExpression
34+
35+DAY = 60*60*24
36+MONTH = DAY*30
37+
38+def prune(config):
39+ """Remove OOPSES that are over 30 days old."""
40+ pool = pycassa.pool.ConnectionPool(config['keyspace'], config['host'])
41+ dayoops_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
42+ oops_cf = pycassa.ColumnFamily(pool, 'OOPS')
43+ # Find days to prune
44+ days = set()
45+ prune_to = time.strftime('%Y%m%d', time.gmtime(time.time() - MONTH))
46+ for key, _ in dayoops_cf.get_range():
47+ if key < prune_to:
48+ days.add(key)
49+ if not days:
50+ return
51+ # collect all the oopses (buffers all in memory; may want to make
52+ # incremental in future)
53+ batch_size = 10000
54+ oopses = []
55+ for day in days:
56+ columns_found = batch_size
57+ start_col = ''
58+ while columns_found==batch_size:
59+ columns = dayoops_cf.get(day, column_start=start_col)
60+ columns_found = len(columns)
61+ for column, oopsid in columns.items():
62+ start_col = column
63+ oopses.append(oopsid)
64+ # Remove the oopses
65+ batch = oops_cf.batch()
66+ map(batch.remove, oopses)
67+ batch.send()
68+ # Clean out the days aggregates
69+ # Clean out the days index
70+ batch = dayoops_cf.batch()
71+ map(batch.remove, days)
72+ batch.send()
73+
74+def insert(config, oopsid, oops_json, user_token=None):
75+ """Insert an OOPS into the system.
76+
77+ :return: The day which the oops was filed under.
78+ """
79+ # make sure the oops report is a json dict, and break out each key to a
80+ # separate column. For now, rather than worrying about typed column values
81+ # we just coerce them all to strings.
82+ oops_dict = json.loads(oops_json)
83+ assert isinstance(oops_dict, dict)
84+ insert_dict = {}
85+ for key, value in oops_dict.items():
86+ insert_dict[key] = json.dumps(value)
87+ return _insert(config, oopsid, insert_dict, user_token)
88+
89+def insert_dict(config, oopsid, oops_dict, user_token=None):
90+ """Insert an OOPS into the system.
91+
92+ :return: The day which the oops was filed under.
93+ """
94+ assert isinstance(oops_dict, dict)
95+ return _insert(config, oopsid, oops_dict, user_token)
96+
97+def _insert(config, oopsid, insert_dict, user_token=None):
98+ pool = pycassa.pool.ConnectionPool(config['keyspace'], config['host'])
99+ dayoops_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
100+ oops_cf = pycassa.ColumnFamily(pool, 'OOPS')
101+ day_key = time.strftime('%Y%m%d', time.gmtime())
102+ now_uuid = uuid.uuid1()
103+
104+ oops_cf.insert(oopsid, insert_dict)
105+ dayoops_cf.insert(day_key, {now_uuid:oopsid})
106+ if user_token:
107+ useroops_cf = pycassa.ColumnFamily(pool, 'UserOOPS')
108+ useroops_cf.insert(user_token, {oopsid : ''})
109+
110+ return day_key
111+
112+def bucket(config, oopsid, bucketid):
113+ """Adds an OOPS to a bucket, a collection of OOPSes that form a single
114+ issue. If the bucket does not exist, it will be created.
115+
116+ :return: The day which the bucket was filed under.
117+ """
118+ pool = pycassa.pool.ConnectionPool(config['keyspace'], config['host'])
119+ bucket_cf = pycassa.ColumnFamily(pool, 'Buckets')
120+ daybucket_cf = pycassa.ColumnFamily(pool, 'DayBuckets')
121+ daybucketcount_cf = pycassa.ColumnFamily(pool, 'DayBucketsCount')
122+ day_key = time.strftime('%Y%m%d', time.gmtime())
123+
124+ bucket_cf.insert(bucketid, {oopsid : ''})
125+ daybucket_cf.insert((day_key, bucketid), {oopsid : ''})
126+ # We have no way of knowing whether an increment has been performed if the
127+ # write fails unexpectedly (CASSANDRA-2495). We will apply eventual
128+ # consistency to this problem and tolerate slightly inaccurate counts for
129+ # the span of a single day, cleaning up once this period has passed. This
130+ # will be done by counting the number of columns in DayBuckets for the day
131+ # and bucket ID.
132+ daybucketcount_cf.add(day_key, bucketid)
133+ return day_key
134
135=== removed file 'oopsrepository/oopses.py'
136--- oopsrepository/oopses.py 2012-02-21 10:56:50 +0000
137+++ oopsrepository/oopses.py 1970-01-01 00:00:00 +0000
138@@ -1,94 +0,0 @@
139-# oops-repository is Copyright 2011 Canonical Ltd.
140-#
141-# Canonical Ltd ("Canonical") distributes the oops-repository source code under
142-# the GNU Affero General Public License, version 3 ("AGPLv3"). See the file
143-# LICENSE in the source tree for more information.
144-
145-"""basic operations on oopses in the db."""
146-
147-import json
148-import bson
149-import time
150-import uuid
151-
152-import pycassa
153-from pycassa.index import create_index_expression, LT, create_index_clause
154-from pycassa.cassandra.ttypes import IndexExpression
155-
156-DAY = 60*60*24
157-MONTH = DAY*30
158-
159-def prune(config):
160- """Remove OOPSES that are over 30 days old."""
161- pool = pycassa.connect(config['keyspace'], config['host'])
162- dayoops_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
163- oops_cf = pycassa.ColumnFamily(pool, 'OOPS')
164- # Find days to prune
165- days = set()
166- prune_to = time.strftime('%Y%m%d', time.gmtime(time.time() - MONTH))
167- for key, _ in dayoops_cf.get_range(columns=()):
168- if key < prune_to:
169- days.add(key)
170- if not days:
171- return
172- # collect all the oopses (buffers all in memory; may want to make
173- # incremental in future)
174- batch_size = 10000
175- oopses = []
176- for day in days:
177- columns_found = batch_size
178- start_col = ''
179- while columns_found==batch_size:
180- columns = dayoops_cf.get(day, column_start=start_col)
181- columns_found = len(columns)
182- for column, oopsid in columns.items():
183- start_col = column
184- oopses.append(oopsid)
185- # Remove the oopses
186- batch = oops_cf.batch()
187- map(batch.remove, oopses)
188- batch.send()
189- # Clean out the days aggregates
190- # Clean out the days index
191- batch = dayoops_cf.batch()
192- map(batch.remove, days)
193- batch.send()
194-
195-def insert(config, oopsid, oops_json, user_token=None):
196- """Insert an OOPS into the system.
197-
198- :return: The day which the oops was filed under.
199- """
200- # make sure the oops report is a json dict, and break out each key to a
201- # separate column. For now, rather than worrying about typed column values
202- # we just coerce them all to strings.
203- oops_dict = json.loads(oops_json)
204- assert isinstance(oops_dict, dict)
205- insert_dict = {}
206- for key, value in oops_dict.items():
207- insert_dict[key] = json.dumps(value)
208- return _insert(config, oopsid, insert_dict, user_token)
209-
210-def insert_bson(config, oopsid, oops_bson, user_token=None):
211- """Insert an OOPS into the system.
212-
213- :return: The day which the oops was filed under.
214- """
215- oops_dict = bson.BSON(oops_bson)
216- insert_dict = oops_dict.to_dict()
217- return _insert(config, oopsid, insert_dict, user_token)
218-
219-def _insert(config, oopsid, insert_dict, user_token=None):
220- pool = pycassa.connect(config['keyspace'], config['host'])
221- dayoops_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
222- oops_cf = pycassa.ColumnFamily(pool, 'OOPS')
223- day_key = time.strftime('%Y%m%d', time.gmtime())
224- now_uuid = uuid.uuid1()
225-
226- oops_cf.insert(oopsid, insert_dict)
227- dayoops_cf.insert(day_key, {now_uuid:oopsid})
228- if user_token:
229- useroops_cf = pycassa.ColumnFamily(pool, 'UserOOPS')
230- useroops_cf.insert(user_token, {oopsid : ''})
231-
232- return day_key
233
234=== modified file 'oopsrepository/schema.py'
235--- oopsrepository/schema.py 2011-04-03 11:14:12 +0000
236+++ oopsrepository/schema.py 2012-03-23 23:37:20 +0000
237@@ -6,6 +6,11 @@
238
239 """The schema for oopsrepository."""
240
241+from pycassa.types import (
242+ CompositeType,
243+ UTF8Type,
244+ CounterColumnType,
245+ )
246 from pycassa.system_manager import (
247 LONG_TYPE,
248 SystemManager,
249@@ -28,9 +33,20 @@
250 mgr = SystemManager()
251 try:
252 workaround_1779(mgr.create_column_family, keyspace, 'OOPS',
253- comparator_type=UTF8_TYPE)
254+ comparator_type=UTF8_TYPE, default_validation_class=UTF8_TYPE)
255 workaround_1779(mgr.create_column_family, keyspace, 'DayOOPS',
256 comparator_type=TIME_UUID_TYPE)
257+ workaround_1779(mgr.create_column_family, keyspace, 'UserOOPS',
258+ comparator_type=UTF8_TYPE)
259+ workaround_1779(mgr.create_column_family, keyspace, 'Buckets',
260+ comparator_type=UTF8_TYPE)
261+ # TODO It might be more performant to use just the date for the key and
262+ # a composite key of the bucket_id and the oops_id as the column name.
263+ composite = CompositeType(UTF8Type(), UTF8Type())
264+ workaround_1779(mgr.create_column_family, keyspace, 'DayBuckets',
265+ comparator_type=UTF8_TYPE, key_validation_class=composite)
266+ workaround_1779(mgr.create_column_family, keyspace, 'DayBucketsCount',
267+ comparator_type=UTF8_TYPE, default_validation_class=CounterColumnType())
268 finally:
269 mgr.close()
270
271
272=== modified file 'oopsrepository/testing/cassandra.py'
273--- oopsrepository/testing/cassandra.py 2011-02-27 04:46:26 +0000
274+++ oopsrepository/testing/cassandra.py 2012-03-23 23:37:20 +0000
275@@ -28,7 +28,7 @@
276 self.keyspace = os.path.basename(tempdir.path)
277 self.mgr = SystemManager()
278 workaround_1779(self.mgr.create_keyspace, self.keyspace,
279- replication_factor=1)
280+ pycassa.SIMPLE_STRATEGY, {'replication_factor' : '1'})
281 self.addCleanup(workaround_1779, self.mgr.drop_keyspace,
282 self.keyspace)
283
284
285=== modified file 'oopsrepository/testing/matchers.py'
286--- oopsrepository/testing/matchers.py 2011-04-03 11:14:12 +0000
287+++ oopsrepository/testing/matchers.py 2012-03-23 23:37:20 +0000
288@@ -11,7 +11,7 @@
289 import uuid
290
291 import pycassa
292-from pycassa.cassandra.Cassandra import NotFoundException
293+from pycassa.cassandra.ttypes import NotFoundException
294 from testtools.matchers import Matcher, Mismatch
295
296
297@@ -22,12 +22,21 @@
298 """
299
300 def match(self, keyspace):
301- pool = pycassa.connect(keyspace)
302+ pool = pycassa.ConnectionPool(keyspace, ['localhost:9160'])
303 try:
304 cf = pycassa.ColumnFamily(pool, 'OOPS')
305 cf.insert('key',
306 {"date":json.dumps(time.time()), "URL":'a bit boring'})
307 cf = pycassa.ColumnFamily(pool, 'DayOOPS')
308 cf.insert('20100212', {uuid.uuid1(): 'key'})
309+ cf = pycassa.ColumnFamily(pool, 'UserOOPS')
310+ cf.insert('user-token', {'key':''})
311+
312+ cf = pycassa.ColumnFamily(pool, 'Buckets')
313+ cf.insert('/bin/bash:11:x86_64:[vdso]+70c:...', {'key':''})
314+ cf = pycassa.ColumnFamily(pool, 'DayBuckets')
315+ cf.insert(('20100212', '/bin/bash:11:x86_64:[vdso]+70c:...'), {'key':''})
316+ cf = pycassa.ColumnFamily(pool, 'DayBucketsCount')
317+ cf.add('20100212', '/bin/bash:11:x86_64:[vdso]+70c:...', 13)
318 except NotFoundException as e:
319 return Mismatch(e.why)
320
321=== modified file 'oopsrepository/tests/test_matchers.py'
322--- oopsrepository/tests/test_matchers.py 2011-02-27 04:46:26 +0000
323+++ oopsrepository/tests/test_matchers.py 2012-03-23 23:37:20 +0000
324@@ -16,6 +16,6 @@
325 def test_creates_columnfamily(self):
326 keyspace = self.useFixture(TemporaryKeyspace()).keyspace
327 self.assertNotEqual(None, HasOOPSSchema().match(keyspace))
328- config = dict(keyspace=keyspace)
329+ config = dict(keyspace=keyspace, host=['localhost:9160'])
330 schema.create(config)
331 self.assertThat(keyspace, HasOOPSSchema())
332
333=== modified file 'oopsrepository/tests/test_oopses.py'
334--- oopsrepository/tests/test_oopses.py 2011-04-03 11:14:12 +0000
335+++ oopsrepository/tests/test_oopses.py 2012-03-23 23:37:20 +0000
336@@ -1,3 +1,4 @@
337+# -*- coding: utf-8; -*-
338 # oops-repository is Copyright 2011 Canonical Ltd.
339 #
340 # Canonical Ltd ("Canonical") distributes the oops-repository source code under
341@@ -19,10 +20,10 @@
342
343 def test_fresh_oops_kept(self):
344 keyspace = self.useFixture(TemporaryOOPSDB()).keyspace
345- config = dict(keyspace=keyspace)
346+ config = dict(keyspace=keyspace, host=['localhost:9160'])
347 day_key = oopses.insert(config, 'key',
348 json.dumps({"date":time.time(), "URL":'a bit boring'}))
349- pool = pycassa.connect(keyspace)
350+ pool = pycassa.ConnectionPool(keyspace, ['localhost:9160'])
351 dayoopses_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
352 oopses_cf = pycassa.ColumnFamily(pool, 'OOPS')
353 oopses.prune(config)
354@@ -32,8 +33,8 @@
355
356 def test_old_oops_deleted(self):
357 keyspace = self.useFixture(TemporaryOOPSDB()).keyspace
358- config = dict(keyspace=keyspace)
359- pool = pycassa.connect(keyspace)
360+ config = dict(keyspace=keyspace, host=['localhost:9160'])
361+ pool = pycassa.ConnectionPool(keyspace, ['localhost:9160'])
362 dayoopses_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
363 datestamp = time.time() - oopses.MONTH - oopses.DAY
364 day_key = time.strftime('%Y%m%d', time.gmtime(datestamp))
365@@ -50,20 +51,68 @@
366
367 class TestInsert(TestCase):
368
369- def test_insert_oops(self):
370- keyspace = self.useFixture(TemporaryOOPSDB()).keyspace
371- oopsid = 'booyah'
372- oops = json.dumps({'duration': 13000})
373- config = dict(keyspace=keyspace)
374- day_key = oopses.insert(config, oopsid, oops)
375- pool = pycassa.connect(keyspace)
376+ def _test_insert_check(self, keyspace, oopsid, day_key, value=None):
377+ pool = pycassa.ConnectionPool(keyspace, host=['localhost:9160'])
378 oopses_cf = pycassa.ColumnFamily(pool, 'OOPS')
379+ if value is None:
380+ value = '13000'
381 # The oops is retrievable
382 columns = oopses_cf.get(oopsid)
383- self.assertEqual('13000', columns['duration'])
384+ self.assertEqual(value, columns['duration'])
385 # The oops has been indexed by day
386- pool = pycassa.connect(keyspace)
387+ pool = pycassa.ConnectionPool(keyspace, host=['localhost:9160'])
388 dayoops_cf = pycassa.ColumnFamily(pool, 'DayOOPS')
389 oops_refs = dayoops_cf.get(day_key)
390 self.assertEqual([oopsid], oops_refs.values())
391 ## TODO - the aggregates for the OOPS have been updated.
392+
393+ def test_insert_oops(self):
394+ keyspace = self.useFixture(TemporaryOOPSDB()).keyspace
395+ oopsid = 'booyah'
396+ oops = json.dumps({'duration': 13000})
397+ config = dict(keyspace=keyspace, host=['localhost:9160'])
398+ day_key = oopses.insert(config, oopsid, oops)
399+ self._test_insert_check(keyspace, oopsid, day_key)
400+
401+ def test_insert_oops_dict(self):
402+ keyspace = self.useFixture(TemporaryOOPSDB()).keyspace
403+ oopsid = 'booyah'
404+ oops = {'duration': '13000'}
405+ config = dict(keyspace=keyspace, host=['localhost:9160'])
406+ day_key = oopses.insert_dict(config, oopsid, oops)
407+ self._test_insert_check(keyspace, oopsid, day_key)
408+
409+ def test_insert_unicode(self):
410+ keyspace = self.useFixture(TemporaryOOPSDB()).keyspace
411+ oopsid = 'booyah'
412+ oops = {'duration': u'♥'}
413+ config = dict(keyspace=keyspace, host=['localhost:9160'])
414+ day_key = oopses.insert_dict(config, oopsid, oops)
415+ self._test_insert_check(keyspace, oopsid, day_key, value=u'♥')
416+
417+class TestBucket(TestCase):
418+
419+ def test_insert_bucket(self):
420+ keyspace = self.useFixture(TemporaryOOPSDB()).keyspace
421+ config = dict(keyspace=keyspace, host=['localhost:9160'])
422+ oopsid = 'booyah'
423+ oops = json.dumps({'duration': 13000})
424+ oopses.insert(config, oopsid, oops)
425+ day_key = oopses.bucket(config, oopsid, 'bucket-key')
426+
427+ pool = pycassa.ConnectionPool(keyspace, ['localhost:9160'])
428+ bucket_cf = pycassa.ColumnFamily(pool, 'Buckets')
429+ daybucketcount_cf = pycassa.ColumnFamily(pool, 'DayBucketsCount')
430+
431+ oops_refs = bucket_cf.get('bucket-key')
432+ self.assertEqual([oopsid], oops_refs.keys())
433+ self.assertEqual(
434+ daybucketcount_cf.get(day_key, ['bucket-key']).values(), [1])
435+
436+ oopsid = 'foobar'
437+ oops = json.dumps({'wibbles': 12})
438+ oopses.insert(config, oopsid, oops)
439+ day_key = oopses.bucket(config, oopsid, 'bucket-key')
440+ self.assertEqual(
441+ daybucketcount_cf.get(day_key, ['bucket-key']).values(), [2])
442+
443
444=== modified file 'oopsrepository/tests/test_schema.py'
445--- oopsrepository/tests/test_schema.py 2011-02-27 04:46:26 +0000
446+++ oopsrepository/tests/test_schema.py 2012-03-23 23:37:20 +0000
447@@ -16,6 +16,6 @@
448
449 def test_creates_columnfamily(self):
450 keyspace = self.useFixture(TemporaryKeyspace()).keyspace
451- config = dict(keyspace=keyspace)
452+ config = dict(keyspace=keyspace, host=['localhost:9160'])
453 schema.create(config)
454 self.assertThat(keyspace, HasOOPSSchema())

Subscribers

People subscribed via source and target branches

to all changes: