Merge lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches into lp:python-oops-datedir-repo

Proposed by Diogo Matsubara
Status: Merged
Approved by: Diogo Matsubara
Approved revision: 10
Merged at revision: 10
Proposed branch: lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches
Merge into: lp:python-oops-datedir-repo
Diff against target: 100 lines (+18/-19)
2 files modified
oops_datedir_repo/serializer_rfc822.py (+14/-17)
oops_datedir_repo/tests/test_serializer_rfc822.py (+4/-2)
To merge this branch: bzr merge lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches
Reviewer Review Type Date Requested Status
Abel Deuring (community) code Approve
Review via email: mp+73059@code.launchpad.net

Description of the change

This branch fixes a couple of small things:

- improves how request variables and db statements are identified by the parser. Before, it'd allow db statements to be read as if they were request variables. The check for request variables is now a bit more precise.

- Duration got from oops report file is now parsed as a float rather than as an int.

- The first traceback line is preserved avoiding a traceback without a newline character in its first line.

To post a comment you must log in.
Revision history for this message
Abel Deuring (adeuring) wrote :

Hi Diogo,

the branch looks good.

Just a minor suggestion:

> === modified file 'oops_datedir_repo/serializer_rfc822.py'
> --- oops_datedir_repo/serializer_rfc822.py 2011-08-25 20:53:24 +0000
> +++ oops_datedir_repo/serializer_rfc822.py 2011-08-26 14:17:14 +0000
> @@ -70,7 +70,7 @@
> exc_value = msg.getheader('exception-value')
> datestr = msg.getheader('date')
> if datestr is not None:
> - date =iso8601.parse_date(msg.getheader('date'))
> + date = iso8601.parse_date(msg.getheader('date'))
> else:
> date = None
> topic = msg.getheader('topic')
> @@ -79,9 +79,9 @@
> username = msg.getheader('user')
> url = msg.getheader('url')
> try:
> - duration = int(float(msg.getheader('duration', '-1')))
> + duration = float(msg.getheader('duration', '-1'))
> except ValueError:
> - duration = -1
> + duration = float(-1)
> informational = msg.getheader('informational')
> branch_nick = msg.getheader('branch')
> revno = msg.getheader('revision')
> @@ -94,28 +94,27 @@
> lines = iter(msg.fp)
>
> def is_req_var(line):
> - return "=" in line
> + return "=" in line and not is_statement(line)
>
> + statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)')
> def is_statement(line):
> - return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> + return statement_pat.match(line)

is_statement() returns a match object or None, from statement_pat.match()...

>
> def is_traceback(line):
> - return 'traceback' in line.lower() or '== EXTRA DATA ==' in line
> + return line.lower().startswith('traceback') or line.startswith(
> + '== EXTRA DATA ==')
>
> req_vars = []
> statements = []
> first_tb_line = ''
> for line in lines:
> + first_tb_line = line
> line = line.strip()
> if line == '':
> continue
> else:
> - if is_req_var(line):
> - key, value = line.split('=', 1)
> - req_vars.append((urllib.unquote(key), urllib.unquote(value)))
> - elif is_statement(line):
> - match = re.match(
> - r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> + if is_statement(line):
> + match = statement_pat.match(line)
> assert match is not None, (
> "Unable to interpret oops line: %s" % line)

... and here, statement_pat.match() is called again. I think
you don't need is_statement() at all: Something like

     match = statement_pat.match(line)
     if match is not None:
         start, end, db_id, statement = match.groups()

would be "good enough" :)

review: Approve (code)
11. By Diogo Matsubara

review comments

Revision history for this message
Diogo Matsubara (matsubara) wrote :

Thanks Abel, applied your suggestions!

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'oops_datedir_repo/serializer_rfc822.py'
--- oops_datedir_repo/serializer_rfc822.py 2011-08-25 20:53:24 +0000
+++ oops_datedir_repo/serializer_rfc822.py 2011-08-26 15:01:46 +0000
@@ -70,7 +70,7 @@
70 exc_value = msg.getheader('exception-value')70 exc_value = msg.getheader('exception-value')
71 datestr = msg.getheader('date')71 datestr = msg.getheader('date')
72 if datestr is not None:72 if datestr is not None:
73 date =iso8601.parse_date(msg.getheader('date'))73 date = iso8601.parse_date(msg.getheader('date'))
74 else:74 else:
75 date = None75 date = None
76 topic = msg.getheader('topic')76 topic = msg.getheader('topic')
@@ -79,9 +79,9 @@
79 username = msg.getheader('user')79 username = msg.getheader('user')
80 url = msg.getheader('url')80 url = msg.getheader('url')
81 try:81 try:
82 duration = int(float(msg.getheader('duration', '-1')))82 duration = float(msg.getheader('duration', '-1'))
83 except ValueError:83 except ValueError:
84 duration = -184 duration = float(-1)
85 informational = msg.getheader('informational')85 informational = msg.getheader('informational')
86 branch_nick = msg.getheader('branch')86 branch_nick = msg.getheader('branch')
87 revno = msg.getheader('revision')87 revno = msg.getheader('revision')
@@ -93,38 +93,35 @@
93 # support iteration.93 # support iteration.
94 lines = iter(msg.fp)94 lines = iter(msg.fp)
9595
96 statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)')
97
96 def is_req_var(line):98 def is_req_var(line):
97 return "=" in line99 return "=" in line and not statement_pat.match(line)
98
99 def is_statement(line):
100 return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
101100
102 def is_traceback(line):101 def is_traceback(line):
103 return 'traceback' in line.lower() or '== EXTRA DATA ==' in line102 return line.lower().startswith('traceback') or line.startswith(
103 '== EXTRA DATA ==')
104104
105 req_vars = []105 req_vars = []
106 statements = []106 statements = []
107 first_tb_line = ''107 first_tb_line = ''
108 for line in lines:108 for line in lines:
109 first_tb_line = line
109 line = line.strip()110 line = line.strip()
110 if line == '':111 if line == '':
111 continue112 continue
112 else:113 else:
113 if is_req_var(line):114 match = statement_pat.match(line)
114 key, value = line.split('=', 1)115 if match is not None:
115 req_vars.append((urllib.unquote(key), urllib.unquote(value)))
116 elif is_statement(line):
117 match = re.match(
118 r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
119 assert match is not None, (
120 "Unable to interpret oops line: %s" % line)
121 start, end, db_id, statement = match.groups()116 start, end, db_id, statement = match.groups()
122 if db_id is not None:117 if db_id is not None:
123 db_id = intern(db_id) # This string is repeated lots.118 db_id = intern(db_id) # This string is repeated lots.
124 statements.append(119 statements.append(
125 (int(start), int(end), db_id, statement))120 (int(start), int(end), db_id, statement))
121 elif is_req_var(line):
122 key, value = line.split('=', 1)
123 req_vars.append((urllib.unquote(key), urllib.unquote(value)))
126 elif is_traceback(line):124 elif is_traceback(line):
127 first_tb_line = line
128 break125 break
129126
130 # The rest is traceback.127 # The rest is traceback.
131128
=== modified file 'oops_datedir_repo/tests/test_serializer_rfc822.py'
--- oops_datedir_repo/tests/test_serializer_rfc822.py 2011-08-25 20:53:24 +0000
+++ oops_datedir_repo/tests/test_serializer_rfc822.py 2011-08-26 15:01:46 +0000
@@ -97,10 +97,11 @@
97 HTTP_REFERER=http://localhost:9000/97 HTTP_REFERER=http://localhost:9000/
98 name%3Dfoo=hello%0Aworld98 name%3Dfoo=hello%0Aworld
9999
100 00001-00005@store_a SELECT 1100 00001-00005@store_a SELECT 1 = 2
101 00005-00010@store_b SELECT 2101 00005-00010@store_b SELECT 2
102102
103 traceback-text"""))103 traceback-text
104 foo/bar"""))
104 report = read(fp)105 report = read(fp)
105 self.assertEqual(report['id'], 'OOPS-A0001')106 self.assertEqual(report['id'], 'OOPS-A0001')
106 self.assertEqual(report['req_vars'][0], ('HTTP_USER_AGENT',107 self.assertEqual(report['req_vars'][0], ('HTTP_USER_AGENT',
@@ -109,6 +110,7 @@
109 'http://localhost:9000/'))110 'http://localhost:9000/'))
110 self.assertEqual(report['req_vars'][2], ('name=foo', 'hello\nworld'))111 self.assertEqual(report['req_vars'][2], ('name=foo', 'hello\nworld'))
111 self.assertEqual(len(report['db_statements']), 2)112 self.assertEqual(len(report['db_statements']), 2)
113 self.assertEqual(report['tb_text'], 'traceback-text\n foo/bar')
112114
113 def test_read_no_store_id(self):115 def test_read_no_store_id(self):
114 """Test ErrorReport.read() for old logs with no store_id."""116 """Test ErrorReport.read() for old logs with no store_id."""

Subscribers

People subscribed via source and target branches

to all changes: