Merge lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches into lp:python-oops-datedir-repo

Proposed by Diogo Matsubara
Status: Merged
Approved by: Diogo Matsubara
Approved revision: 10
Merged at revision: 10
Proposed branch: lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches
Merge into: lp:python-oops-datedir-repo
Diff against target: 100 lines (+18/-19)
2 files modified
oops_datedir_repo/serializer_rfc822.py (+14/-17)
oops_datedir_repo/tests/test_serializer_rfc822.py (+4/-2)
To merge this branch: bzr merge lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches
Reviewer Review Type Date Requested Status
Abel Deuring (community) code Approve
Review via email: mp+73059@code.launchpad.net

Description of the change

This branch fixes a couple of small things:

- improves how request variables and db statements are identified by the parser. Before, it'd allow db statements to be read as if they were request variables. The check for request variables is now a bit more precise.

- Duration got from oops report file is now parsed as a float rather than as an int.

- The first traceback line is preserved avoiding a traceback without a newline character in its first line.

To post a comment you must log in.
Revision history for this message
Abel Deuring (adeuring) wrote :

Hi Diogo,

the branch looks good.

Just a minor suggestion:

> === modified file 'oops_datedir_repo/serializer_rfc822.py'
> --- oops_datedir_repo/serializer_rfc822.py 2011-08-25 20:53:24 +0000
> +++ oops_datedir_repo/serializer_rfc822.py 2011-08-26 14:17:14 +0000
> @@ -70,7 +70,7 @@
> exc_value = msg.getheader('exception-value')
> datestr = msg.getheader('date')
> if datestr is not None:
> - date =iso8601.parse_date(msg.getheader('date'))
> + date = iso8601.parse_date(msg.getheader('date'))
> else:
> date = None
> topic = msg.getheader('topic')
> @@ -79,9 +79,9 @@
> username = msg.getheader('user')
> url = msg.getheader('url')
> try:
> - duration = int(float(msg.getheader('duration', '-1')))
> + duration = float(msg.getheader('duration', '-1'))
> except ValueError:
> - duration = -1
> + duration = float(-1)
> informational = msg.getheader('informational')
> branch_nick = msg.getheader('branch')
> revno = msg.getheader('revision')
> @@ -94,28 +94,27 @@
> lines = iter(msg.fp)
>
> def is_req_var(line):
> - return "=" in line
> + return "=" in line and not is_statement(line)
>
> + statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)')
> def is_statement(line):
> - return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> + return statement_pat.match(line)

is_statement() returns a match object or None, from statement_pat.match()...

>
> def is_traceback(line):
> - return 'traceback' in line.lower() or '== EXTRA DATA ==' in line
> + return line.lower().startswith('traceback') or line.startswith(
> + '== EXTRA DATA ==')
>
> req_vars = []
> statements = []
> first_tb_line = ''
> for line in lines:
> + first_tb_line = line
> line = line.strip()
> if line == '':
> continue
> else:
> - if is_req_var(line):
> - key, value = line.split('=', 1)
> - req_vars.append((urllib.unquote(key), urllib.unquote(value)))
> - elif is_statement(line):
> - match = re.match(
> - r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> + if is_statement(line):
> + match = statement_pat.match(line)
> assert match is not None, (
> "Unable to interpret oops line: %s" % line)

... and here, statement_pat.match() is called again. I think
you don't need is_statement() at all: Something like

     match = statement_pat.match(line)
     if match is not None:
         start, end, db_id, statement = match.groups()

would be "good enough" :)

review: Approve (code)
11. By Diogo Matsubara

review comments

Revision history for this message
Diogo Matsubara (matsubara) wrote :

Thanks Abel, applied your suggestions!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'oops_datedir_repo/serializer_rfc822.py'
2--- oops_datedir_repo/serializer_rfc822.py 2011-08-25 20:53:24 +0000
3+++ oops_datedir_repo/serializer_rfc822.py 2011-08-26 15:01:46 +0000
4@@ -70,7 +70,7 @@
5 exc_value = msg.getheader('exception-value')
6 datestr = msg.getheader('date')
7 if datestr is not None:
8- date =iso8601.parse_date(msg.getheader('date'))
9+ date = iso8601.parse_date(msg.getheader('date'))
10 else:
11 date = None
12 topic = msg.getheader('topic')
13@@ -79,9 +79,9 @@
14 username = msg.getheader('user')
15 url = msg.getheader('url')
16 try:
17- duration = int(float(msg.getheader('duration', '-1')))
18+ duration = float(msg.getheader('duration', '-1'))
19 except ValueError:
20- duration = -1
21+ duration = float(-1)
22 informational = msg.getheader('informational')
23 branch_nick = msg.getheader('branch')
24 revno = msg.getheader('revision')
25@@ -93,38 +93,35 @@
26 # support iteration.
27 lines = iter(msg.fp)
28
29+ statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)')
30+
31 def is_req_var(line):
32- return "=" in line
33-
34- def is_statement(line):
35- return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
36+ return "=" in line and not statement_pat.match(line)
37
38 def is_traceback(line):
39- return 'traceback' in line.lower() or '== EXTRA DATA ==' in line
40+ return line.lower().startswith('traceback') or line.startswith(
41+ '== EXTRA DATA ==')
42
43 req_vars = []
44 statements = []
45 first_tb_line = ''
46 for line in lines:
47+ first_tb_line = line
48 line = line.strip()
49 if line == '':
50 continue
51 else:
52- if is_req_var(line):
53- key, value = line.split('=', 1)
54- req_vars.append((urllib.unquote(key), urllib.unquote(value)))
55- elif is_statement(line):
56- match = re.match(
57- r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
58- assert match is not None, (
59- "Unable to interpret oops line: %s" % line)
60+ match = statement_pat.match(line)
61+ if match is not None:
62 start, end, db_id, statement = match.groups()
63 if db_id is not None:
64 db_id = intern(db_id) # This string is repeated lots.
65 statements.append(
66 (int(start), int(end), db_id, statement))
67+ elif is_req_var(line):
68+ key, value = line.split('=', 1)
69+ req_vars.append((urllib.unquote(key), urllib.unquote(value)))
70 elif is_traceback(line):
71- first_tb_line = line
72 break
73
74 # The rest is traceback.
75
76=== modified file 'oops_datedir_repo/tests/test_serializer_rfc822.py'
77--- oops_datedir_repo/tests/test_serializer_rfc822.py 2011-08-25 20:53:24 +0000
78+++ oops_datedir_repo/tests/test_serializer_rfc822.py 2011-08-26 15:01:46 +0000
79@@ -97,10 +97,11 @@
80 HTTP_REFERER=http://localhost:9000/
81 name%3Dfoo=hello%0Aworld
82
83- 00001-00005@store_a SELECT 1
84+ 00001-00005@store_a SELECT 1 = 2
85 00005-00010@store_b SELECT 2
86
87- traceback-text"""))
88+ traceback-text
89+ foo/bar"""))
90 report = read(fp)
91 self.assertEqual(report['id'], 'OOPS-A0001')
92 self.assertEqual(report['req_vars'][0], ('HTTP_USER_AGENT',
93@@ -109,6 +110,7 @@
94 'http://localhost:9000/'))
95 self.assertEqual(report['req_vars'][2], ('name=foo', 'hello\nworld'))
96 self.assertEqual(len(report['db_statements']), 2)
97+ self.assertEqual(report['tb_text'], 'traceback-text\n foo/bar')
98
99 def test_read_no_store_id(self):
100 """Test ErrorReport.read() for old logs with no store_id."""

Subscribers

People subscribed via source and target branches

to all changes: