Merge into trunk : improve-db-statement-matches : Code : Python OOPS Date-dir repository

Status:	Merged
Approved by:	Diogo Matsubara on 2011-08-26
Approved revision:	10
Merged at revision:	10
Proposed branch:	lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches
Merge into:	lp:python-oops-datedir-repo
Diff against target:	100 lines (+18/-19) 2 files modified oops_datedir_repo/serializer_rfc822.py (+14/-17) oops_datedir_repo/tests/test_serializer_rfc822.py (+4/-2)
To merge this branch:	bzr merge lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Abel Deuring (community)	code	2011-08-26	Approve on 2011-08-26
Review via email: mp+73059@code.launchpad.net

Description of the change

This branch fixes a couple of small things:

- improves how request variables and db statements are identified by the parser. Before, it'd allow db statements to be read as if they were request variables. The check for request variables is now a bit more precise.

- Duration got from oops report file is now parsed as a float rather than as an int.

- The first traceback line is preserved avoiding a traceback without a newline character in its first line.

Revision history for this message

Abel Deuring (adeuring) wrote on 2011-08-26:

#

Hi Diogo,

the branch looks good.

Just a minor suggestion:

> === modified file 'oops_datedir_repo/serializer_rfc822.py'
> --- oops_datedir_repo/serializer_rfc822.py 2011-08-25 20:53:24 +0000
> +++ oops_datedir_repo/serializer_rfc822.py 2011-08-26 14:17:14 +0000
> @@ -70,7 +70,7 @@
> exc_value = msg.getheader('exception-value')
> datestr = msg.getheader('date')
> if datestr is not None:
> - date =iso8601.parse_date(msg.getheader('date'))
> + date = iso8601.parse_date(msg.getheader('date'))
> else:
> date = None
> topic = msg.getheader('topic')
> @@ -79,9 +79,9 @@
> username = msg.getheader('user')
> url = msg.getheader('url')
> try:
> - duration = int(float(msg.getheader('duration', '-1')))
> + duration = float(msg.getheader('duration', '-1'))
> except ValueError:
> - duration = -1
> + duration = float(-1)
> informational = msg.getheader('informational')
> branch_nick = msg.getheader('branch')
> revno = msg.getheader('revision')
> @@ -94,28 +94,27 @@
> lines = iter(msg.fp)
>
> def is_req_var(line):
> - return "=" in line
> + return "=" in line and not is_statement(line)
>
> + statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)')
> def is_statement(line):
> - return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> + return statement_pat.match(line)

is_statement() returns a match object or None, from statement_pat.match()...

>
> def is_traceback(line):
> - return 'traceback' in line.lower() or '== EXTRA DATA ==' in line
> + return line.lower().startswith('traceback') or line.startswith(
> + '== EXTRA DATA ==')
>
> req_vars = []
> statements = []
> first_tb_line = ''
> for line in lines:
> + first_tb_line = line
> line = line.strip()
> if line == '':
> continue
> else:
> - if is_req_var(line):
> - key, value = line.split('=', 1)
> - req_vars.append((urllib.unquote(key), urllib.unquote(value)))
> - elif is_statement(line):
> - match = re.match(
> - r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> + if is_statement(line):
> + match = statement_pat.match(line)
> assert match is not None, (
> "Unable to interpret oops line: %s" % line)

... and here, statement_pat.match() is called again. I think
you don't need is_statement() at all: Something like

     match = statement_pat.match(line)
     if match is not None:
         start, end, db_id, statement = match.groups()

would be "good enough" :)

Hi Diogo,

the branch looks good.

Just a minor suggestion:

> === modified file 'oops_datedir_repo/serializer_rfc822.py'
> --- oops_datedir_repo/serializer_rfc822.py	2011-08-25 20:53:24 +0000
> +++ oops_datedir_repo/serializer_rfc822.py	2011-08-26 14:17:14 +0000
> @@ -70,7 +70,7 @@
>      exc_value = msg.getheader('exception-value')
>      datestr = msg.getheader('date')
>      if datestr is not None:
> -        date =iso8601.parse_date(msg.getheader('date'))
> +        date = iso8601.parse_date(msg.getheader('date'))
>      else:
>          date = None
>      topic = msg.getheader('topic')
> @@ -79,9 +79,9 @@
>      username = msg.getheader('user')
>      url = msg.getheader('url')
>      try:
> -        duration = int(float(msg.getheader('duration', '-1')))
> +        duration = float(msg.getheader('duration', '-1'))
>      except ValueError:
> -        duration = -1
> +        duration = float(-1)
>      informational = msg.getheader('informational')
>      branch_nick = msg.getheader('branch')
>      revno = msg.getheader('revision')
> @@ -94,28 +94,27 @@
>      lines = iter(msg.fp)
>  
>      def is_req_var(line):
> -        return "=" in line
> +        return "=" in line and not is_statement(line)
>  
> +    statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)')
>      def is_statement(line):
> -        return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> +        return statement_pat.match(line)

is_statement() returns a match object or None, from statement_pat.match()...

>  
>      def is_traceback(line):
> -        return 'traceback' in line.lower() or '== EXTRA DATA ==' in line
> +        return line.lower().startswith('traceback') or line.startswith(
> +            '== EXTRA DATA ==')
>  
>      req_vars = []
>      statements = []
>      first_tb_line = ''
>      for line in lines:
> +        first_tb_line = line
>          line = line.strip()
>          if line == '':
>              continue
>          else:
> -            if is_req_var(line):
> -                key, value = line.split('=', 1)
> -                req_vars.append((urllib.unquote(key), urllib.unquote(value)))
> -            elif is_statement(line):
> -                match = re.match(
> -                    r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
> +            if is_statement(line):
> +                match = statement_pat.match(line)
>                  assert match is not None, (
>                      "Unable to interpret oops line: %s" % line)

... and here, statement_pat.match() is called again. I think
you don't need is_statement() at all: Something like

match = statement_pat.match(line)
     if match is not None:
         start, end, db_id, statement = match.groups()

would be "good enough" :)

review: Approve (code)

lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches updated on 2011-08-26

11. By Diogo Matsubara on 2011-08-26: review comments

Revision history for this message

Diogo Matsubara (matsubara) wrote on 2011-08-26:

#

Thanks Abel, applied your suggestions!

1	=== modified file 'oops_datedir_repo/serializer_rfc822.py'
2	--- oops_datedir_repo/serializer_rfc822.py 2011-08-25 20:53:24 +0000
3	+++ oops_datedir_repo/serializer_rfc822.py 2011-08-26 15:01:46 +0000
4	@@ -70,7 +70,7 @@
5	exc_value = msg.getheader('exception-value')
6	datestr = msg.getheader('date')
7	if datestr is not None:
8	- date =iso8601.parse_date(msg.getheader('date'))
9	+ date = iso8601.parse_date(msg.getheader('date'))
10	else:
11	date = None
12	topic = msg.getheader('topic')
13	@@ -79,9 +79,9 @@
14	username = msg.getheader('user')
15	url = msg.getheader('url')
16	try:
17	- duration = int(float(msg.getheader('duration', '-1')))
18	+ duration = float(msg.getheader('duration', '-1'))
19	except ValueError:
20	- duration = -1
21	+ duration = float(-1)
22	informational = msg.getheader('informational')
23	branch_nick = msg.getheader('branch')
24	revno = msg.getheader('revision')
25	@@ -93,38 +93,35 @@
26	# support iteration.
27	lines = iter(msg.fp)
28
29	+ statement_pat = re.compile(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)')
30	+
31	def is_req_var(line):
32	- return "=" in line
33	-
34	- def is_statement(line):
35	- return re.match(r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
36	+ return "=" in line and not statement_pat.match(line)
37
38	def is_traceback(line):
39	- return 'traceback' in line.lower() or '== EXTRA DATA ==' in line
40	+ return line.lower().startswith('traceback') or line.startswith(
41	+ '== EXTRA DATA ==')
42
43	req_vars = []
44	statements = []
45	first_tb_line = ''
46	for line in lines:
47	+ first_tb_line = line
48	line = line.strip()
49	if line == '':
50	continue
51	else:
52	- if is_req_var(line):
53	- key, value = line.split('=', 1)
54	- req_vars.append((urllib.unquote(key), urllib.unquote(value)))
55	- elif is_statement(line):
56	- match = re.match(
57	- r'^(\d+)-(\d+)(?:@([\w-]+))?\s+(.*)', line)
58	- assert match is not None, (
59	- "Unable to interpret oops line: %s" % line)
60	+ match = statement_pat.match(line)
61	+ if match is not None:
62	start, end, db_id, statement = match.groups()
63	if db_id is not None:
64	db_id = intern(db_id) # This string is repeated lots.
65	statements.append(
66	(int(start), int(end), db_id, statement))
67	+ elif is_req_var(line):
68	+ key, value = line.split('=', 1)
69	+ req_vars.append((urllib.unquote(key), urllib.unquote(value)))
70	elif is_traceback(line):
71	- first_tb_line = line
72	break
73
74	# The rest is traceback.
75
76	=== modified file 'oops_datedir_repo/tests/test_serializer_rfc822.py'
77	--- oops_datedir_repo/tests/test_serializer_rfc822.py 2011-08-25 20:53:24 +0000
78	+++ oops_datedir_repo/tests/test_serializer_rfc822.py 2011-08-26 15:01:46 +0000
79	@@ -97,10 +97,11 @@
80	HTTP_REFERER=http://localhost:9000/
81	name%3Dfoo=hello%0Aworld
82
83	- 00001-00005@store_a SELECT 1
84	+ 00001-00005@store_a SELECT 1 = 2
85	00005-00010@store_b SELECT 2
86
87	- traceback-text"""))
88	+ traceback-text
89	+ foo/bar"""))
90	report = read(fp)
91	self.assertEqual(report['id'], 'OOPS-A0001')
92	self.assertEqual(report['req_vars'][0], ('HTTP_USER_AGENT',
93	@@ -109,6 +110,7 @@
94	'http://localhost:9000/'))
95	self.assertEqual(report['req_vars'][2], ('name=foo', 'hello\nworld'))
96	self.assertEqual(len(report['db_statements']), 2)
97	+ self.assertEqual(report['tb_text'], 'traceback-text\n foo/bar')
98
99	def test_read_no_store_id(self):
100	"""Test ErrorReport.read() for old logs with no store_id."""

Python OOPS Date-dir repository

Merge lp:~matsubara/python-oops-datedir-repo/improve-db-statement-matches into lp:python-oops-datedir-repo

Commit message

Description of the change

Preview Diff

Subscribers