Merge lp:~brian-murray/daisy/src-version-buckets into lp:daisy

Proposed by Brian Murray
Status: Merged
Merged at revision: 283
Proposed branch: lp:~brian-murray/daisy/src-version-buckets
Merge into: lp:daisy
Diff against target: 68 lines (+46/-0)
2 files modified
daisy/utils.py (+4/-0)
tools/build_src_version_buckets.py (+42/-0)
To merge this branch: bzr merge lp:~brian-murray/daisy/src-version-buckets
Reviewer Review Type Date Requested Status
Daisy Pluckers Pending
Review via email: mp+155631@code.launchpad.net

Description of the change

This branch will start recording to SourceVersionBuckets during the bucketing of Oopses. Additionally, there is a tool to populate SourceVersionBuckets with data from the OOPS column family.

To post a comment you must log in.
Revision history for this message
Evan (ev) wrote :

Mostly looks good. Feel free to merge once you've addressed the points
below.

On Tue, Mar 26, 2013 at 9:57 PM, Brian Murray <email address hidden> wrote:
>
> + oopses.update_source_version_buckets(oops_config, src_package,
> + version, crash_signature)
>

Can you make this a if hasattr(oopses, 'update_source_version_buckets') so
that we don't crash on processing new crashes while we're updating
oops-repository and daisy on production.

> if version:
> oopses.update_bucket_versions(oops_config, crash_signature,
> version)
>
>
> +for key, oops in oops_cf.get_range(columns=cols):
> + count += 1
> + if count % 10000 == 0:
> + break
>

I suspect you had this in for debugging?

> + release = oops['DistroRelease'].encode('utf8')
>

oops.get. You're not guaranteed to get rows with all the column names you
specified.

> +
> + if not release.startswith('Ubuntu '):
> + continue
> + package_data = oops['Package'].split(' ')

+ if len(package_data) < 2:
> + continue
> + version = package_data[1]
>

Please use daisy.utils.split_package_and_version instead, as it catches
some corner cases (hopefully).

+ src_package = oops['SourcePackage']
>

oops.get.

> + srcversbucketsinsert((src_package, version), {oops_id : ''})
>

Typo :)

280. By Brian Murray

changes based on evan's feedback

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'daisy/utils.py'
--- daisy/utils.py 2013-03-26 23:58:48 +0000
+++ daisy/utils.py 2013-03-27 17:13:24 +0000
@@ -38,6 +38,7 @@
38def bucket(oops_config, oops_id, crash_signature, report_dict):38def bucket(oops_config, oops_id, crash_signature, report_dict):
39 release = report_dict.get('DistroRelease', '')39 release = report_dict.get('DistroRelease', '')
40 package = report_dict.get('Package', '')40 package = report_dict.get('Package', '')
41 src_package = report_dict.get('SourcePackage', '')
41 problem_type = report_dict.get('ProblemType', '')42 problem_type = report_dict.get('ProblemType', '')
42 dependencies = report_dict.get('Dependencies', '')43 dependencies = report_dict.get('Dependencies', '')
43 system_uuid = report_dict.get('SystemIdentifier', '')44 system_uuid = report_dict.get('SystemIdentifier', '')
@@ -78,6 +79,9 @@
78 oopses.update_bucket_metadata(oops_config, crash_signature, package,79 oopses.update_bucket_metadata(oops_config, crash_signature, package,
79 version, apt.apt_pkg.version_compare,80 version, apt.apt_pkg.version_compare,
80 release)81 release)
82 if hasattr(oopses, 'update_source_version_buckets'):
83 oopses.update_source_version_buckets(oops_config, src_package,
84 version, crash_signature)
81 if version:85 if version:
82 oopses.update_bucket_versions(oops_config, crash_signature, version)86 oopses.update_bucket_versions(oops_config, crash_signature, version)
8387
8488
=== added file 'tools/build_src_version_buckets.py'
--- tools/build_src_version_buckets.py 1970-01-01 00:00:00 +0000
+++ tools/build_src_version_buckets.py 2013-03-27 17:13:24 +0000
@@ -0,0 +1,42 @@
1#!/usr/bin/python
2
3import pycassa
4import uuid
5from daisy import config
6from utils import split_package_and_version
7from collections import Counter
8
9creds = {'username': config.cassandra_username,
10 'password': config.cassandra_password}
11pool = pycassa.ConnectionPool(config.cassandra_keyspace,
12 config.cassandra_hosts, timeout=600,
13 max_retries=100, credentials=creds)
14
15oops_cf = pycassa.ColumnFamily(pool, 'OOPS')
16srcversbuckets = pycassa.ColumnFamily(pool, 'SourceVersionBuckets')
17
18cols = ['SourcePackage', 'Package', 'DistroRelease']
19count = 0
20for key, oops in oops_cf.get_range(columns=cols):
21 count += 1
22 if count % 100000 == 0:
23 print 'processed', count
24
25 if Counter(cols) != Counter(oops.keys()):
26 continue
27
28 release = oops.get('DistroRelease', '')
29 if not release.startswith('Ubuntu ') or release == '':
30 continue
31 package = oops.get('Package', '')
32 if package:
33 package, version = split_package_and_version(package)
34 src_package = oops.get('SourcePackage', '')
35 if src_package == '' or version == '':
36 continue
37 oops_id = uuid.UUID(key)
38 #print('Would insert (%s, %s) = {%s, ""}' % (src_package, version,
39 # oops_id))
40 srcversbuckets.insert((src_package, version), {oops_id : ''})
41
42print 'total processed', count

Subscribers

People subscribed via source and target branches

to all changes: