Merge lp:~vila/udd/analyze_log into lp:udd

Proposed by Vincent Ladeuil
Status: Merged
Merged at revision: 398
Proposed branch: lp:~vila/udd/analyze_log
Merge into: lp:udd
Diff against target: 364 lines (+360/-0)
1 file modified
analyze_log.py (+360/-0)
To merge this branch: bzr merge lp:~vila/udd/analyze_log
Reviewer Review Type Date Requested Status
Martin Pool Approve
Review via email: mp+49775@code.launchpad.net

Description of the change

I did some log analysis with this script and thought it can help us in the future.

It will probably have a short live for some parts of it once we can provide more data from the imports themselves but in the mean time there is no real value to keep it private.

To post a comment you must log in.
Revision history for this message
Martin Pool (mbp) wrote :

Please mention it in a HACKING file so that people can find it in future.

review: Approve
lp:~vila/udd/analyze_log updated
398. By Vincent Ladeuil

Add analyze_log to summarize past activities

Revision history for this message
Vincent Ladeuil (vila) wrote :

>>>>> Martin Pool <email address hidden> writes:

    > Review: Approve
    > Please mention it in a HACKING file so that people can find it in future.

Done: https://wiki.ubuntu.com/DistributedDevelopment/UnderTheHood/Importer/Operational

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'analyze_log.py'
2--- analyze_log.py 1970-01-01 00:00:00 +0000
3+++ analyze_log.py 2011-02-15 09:17:57 +0000
4@@ -0,0 +1,360 @@
5+#!/usr/bin/python
6+"""Analyzes the package importer log files collecting various historical data.
7+
8+This may need to be tweaked as the logger usage evolves.
9+"""
10+
11+import datetime
12+import errno
13+import operator
14+import os
15+import re
16+import sys
17+import time
18+
19+sys.path.insert(0, os.path.dirname(__file__))
20+import icommon
21+
22+
23+class LoggerLineMatcher(object):
24+
25+ actor = None
26+
27+ def __init__(self, actor, ident, regexp, args=None):
28+ """Logger line scanner helper.
29+
30+ :param actor: Uniquely identify an actor in the log file.
31+
32+ :param ident: Uniquely identify a logger line type for agiven ``actor``.
33+
34+ :param regexp: Regular expression the message should match.
35+
36+ :param args: A list a indices in the groups() matched, each
37+ corresponding group will be added in the order given in the
38+ details().
39+ """
40+ self.actor = actor
41+ self.ident = ident
42+ self.regexp = re.compile(regexp)
43+ self.args = args
44+
45+ def matches(self, msg):
46+ self.match = self.regexp.match(msg)
47+ return self.match is not None
48+
49+
50+class ActorParser(object):
51+ """A parser for an actor defined by the lines it emits in a log file."""
52+
53+ def __init__(self, ident):
54+ self.ident = ident
55+ self.start = None # datetime.datetime object
56+ self.end = None # datetime.datetime object
57+ self.duration = None #datetime.timedelta object
58+
59+ def see(self, state, stamp, args):
60+ raise NotImplementedError(self.see)
61+
62+ def dead(self):
63+ return self.end is not None
64+
65+ def round_time(self, t):
66+ return t - datetime.timedelta(microseconds=t.microsecond)
67+
68+ def round_delta(self, d):
69+ return d - datetime.timedelta(microseconds=d.microseconds)
70+
71+ def report_key(self):
72+ return (self.end, self.start)
73+
74+ def report_times(self, out, epoch=None, apocalypse=None):
75+ estimate = False
76+ if self.start is None:
77+ estimate = True
78+ self.start = epoch
79+ out.write('%s%s' % ([' ', '<'][estimate],
80+ self.round_time(self.start)))
81+ if self.end is None:
82+ estimate = True
83+ self.end = apocalypse
84+ duration = self.end - self.start
85+ if duration.seconds >= 10:
86+ # Lost in the noise
87+ duration = self.round_delta(duration)
88+ self.duration = duration
89+ out.write(' %s%s' % ([' ', '>'][estimate], duration))
90+ out.write(' %s\n' % (self.ident,))
91+
92+
93+class ControllerParser(ActorParser):
94+
95+ def __init__(self):
96+ super(ControllerParser, self).__init__('controller')
97+ self.nb_packages = 0
98+ self.cumulated_duration = datetime.timedelta(0)
99+ self.longest = None
100+ self.longest_duration = datetime.timedelta(0)
101+
102+ def see(self, state, stamp, args):
103+ if state == 'starting':
104+ self.start = stamp
105+ elif state == 'signaled':
106+ pass
107+ elif state == 'waiting':
108+ pass
109+ else:
110+ assert state == 'finished'
111+ self.end = stamp
112+
113+ def package_ends(self, pack):
114+ if pack.duration > self.longest_duration:
115+ self.longest = pack.ident
116+ self.longest_duration = pack.duration
117+ self.cumulated_duration += pack.duration
118+ self.nb_packages += 1
119+
120+ def report_times(self, out, epoch=None, apocalypse=None):
121+ super(ControllerParser, self).report_times(out, epoch, apocalypse)
122+ if self.nb_packages == 0:
123+ self.nb_packages = 1 # avoid divide by zero
124+ out.write(' imported %6d, average %10s, longest: %10s %s\n'
125+ % (self.nb_packages, self.round_delta(self.cumulated_duration
126+ / self.nb_packages),
127+ self.round_delta(self.longest_duration), self.longest,
128+ ))
129+
130+
131+class DriverParser(ActorParser):
132+
133+ def __init__(self):
134+ super(DriverParser, self).__init__('driver')
135+ self.concurrency = 0
136+
137+ def see(self, state, stamp, args):
138+ if state == 'starting':
139+ self.start = stamp
140+ elif state == 'active':
141+ concurrency = len(args[0].split(','))
142+ if concurrency != self.concurrency:
143+ self.concurrency = concurrency
144+ print '>>> %s - Concurrency is now: %d' % (stamp, concurrency,)
145+ elif state in ('asked_to_stop', 'crashed'):
146+ pass
147+ else:
148+ assert state in ('stopping', 'finished')
149+ self.end = stamp
150+
151+
152+class PackageParser(ActorParser):
153+
154+ def __init__(self, name):
155+ super(PackageParser, self).__init__(name)
156+ self.tb = None
157+
158+ def see(self, state, stamp, args):
159+ if state == 'trying':
160+ self.start = stamp
161+ elif state == 'locked':
162+ self.end = stamp
163+ elif state == 'finished':
164+ pass
165+ elif state == 'success':
166+ # There is something to grab in args there, distro, pocket, etc
167+ self.end = stamp
168+ else:
169+ assert state == 'failed'
170+ self.end = stamp
171+ # This is followed by "something" containing a
172+ # traceback, so we slurp all lines until the next valid
173+ # logger line
174+ log = args[0]
175+ something = []
176+ while True:
177+ lineno, line = log.next_line()
178+ # Trying to verify that ``line`` is a proper logger
179+ # line is far too expensive, so we try a cheap
180+ # substitute instead the initial date will start
181+ # with 20 for the current century.
182+ if line.startswith('20'):
183+ log.pushed_back_line = line
184+ break
185+ something.append(line)
186+ self.tb = ''.join(something)
187+
188+
189+class Log(object):
190+
191+ def __init__(self, path):
192+ self.path = path
193+ self.stream = iter(open(self.path))
194+ self.lineno = 0
195+ self.pushed_back_line = None
196+ # The first and last stamp seen in the log file
197+ self.epoch = None
198+ self.apocalypse = None
199+ # A logger line is:
200+ # <time stamp> - <component?> - level - msg
201+ self.logger_line_re = re.compile(
202+ '([0-9- :,]{23}) - ([^ -]+) - ([A-Z]+) - (.*)')
203+
204+ def Controller(ident, regexp):
205+ return LoggerLineMatcher('controller', ident, regexp)
206+
207+ def Driver(ident, regexp):
208+ return LoggerLineMatcher('driver', ident, regexp)
209+
210+ def Package(ident, regexp):
211+ return LoggerLineMatcher('package', ident, regexp)
212+
213+
214+ # All the possible lines, most often found first (as a cheap shot
215+ # optimization).
216+ self.matchers = [
217+ Package('trying', 'Trying ([^ ]+)'),
218+ Package('locked', "Couldn't lock ([^ ]+), skipping"),
219+ Package('success', 'Success ([^ ]+): (.*)'),
220+ Package('finished', 'thread for ([^ ]+) finished'),
221+ Package('failed', 'Importing ([^ ]+) failed:'),
222+ # Driver logger lines
223+ Driver('active', 'threads for \[([^\]]+)\] still active'),
224+ Driver('asked_to_stop', 'Driver asked to stop'),
225+ Driver('crashed', 'Driver hit (.*)'),
226+ Driver('stopping', 'Driver stopping'),
227+ Driver('finished', 'Driver finished: stopping'),
228+ # Controller logger lines
229+ Controller('starting', 'Starting up'),
230+ Controller('signaled', 'Received signal'),
231+ Controller('waiting', 'Waiting for driver to finish'),
232+ Controller('finished', 'Finished'),
233+ ]
234+ # At any given point in time, there could be one controller, one driver
235+ # and several packages writing to the log file. There is no guarantee
236+ # that any of them will have corresponding lines for starting or
237+ # finishing (not to mention dieing).
238+ self.controller = None
239+ self.driver = None
240+ self.package_parsers = {}
241+
242+ def scan_logger_line(self, stamp_str, component, level, msg):
243+ stamp = datetime.datetime.strptime(stamp_str[:-4], '%Y-%m-%d %H:%M:%S')
244+ # strptime can't parse the miliseconds, do it manually
245+ stamp = stamp.replace(microsecond=int(stamp_str[-3:])*1000)
246+ if self.epoch is None:
247+ self.epoch = stamp
248+ self.apocalypse = stamp
249+
250+ for m in self.matchers:
251+ if m.matches(msg):
252+ return (stamp, m.actor, m.ident, m.match.groups())
253+
254+ return (stamp, 'unknown', 'garbage', rest)
255+
256+ def next_line(self):
257+ if self.pushed_back_line is not None:
258+ # Give back the pushed back line without changing the lineno
259+ line = self.pushed_back_line
260+ self.pushed_back_line = None
261+ else:
262+ line = self.stream.next()
263+ self.lineno +=1
264+ return self.lineno, line
265+
266+ def scan(self):
267+ while True:
268+ lineno, line = self.next_line()
269+ res = self.logger_line_re.match(line)
270+ if res is not None:
271+ stamp, component, level, msg = res.groups()
272+ yield lineno, 'logger', self.scan_logger_line(
273+ stamp, component, level, msg)
274+ else:
275+ yield lineno, 'garbage', line
276+
277+ def get_controller(self):
278+ # autovivification
279+ if self.controller is None:
280+ self.controller = ControllerParser()
281+ return self.controller
282+
283+ def get_driver(self):
284+ # autovivification
285+ if self.driver is None:
286+ self.driver = DriverParser()
287+ return self.driver
288+
289+ def get_package_parser(self, name):
290+ try:
291+ p = self.package_parsers[name]
292+ except KeyError:
293+ # autovivification
294+ p = PackageParser(name)
295+ self.package_parsers[name] = p
296+ return p
297+
298+ def report_all(self):
299+ c = self.get_controller()
300+ packages = sorted(self.package_parsers.values(),
301+ key=operator.methodcaller('report_key'))
302+ for p in packages:
303+ # Sort by p.start (including None)
304+ p.report_times(sys.stdout, self.epoch, self.apocalypse)
305+ c.package_ends(p)
306+ self.get_driver().report_times(sys.stdout, self.epoch, self.apocalypse)
307+ c.report_times(sys.stdout, self.epoch, self.apocalypse)
308+
309+ def parse(self):
310+ # controller and driver always run together (there is a bug if the
311+ # controller is killed with a hammer, we don't care.
312+ self.get_controller()
313+ self.get_driver()
314+ try:
315+ for lineno, typ, args in self.scan():
316+ if typ == 'logger':
317+ (stamp, actor, ident,
318+ remaining) = args[0], args[1], args[2], args[3]
319+ if actor == 'controller':
320+ c = self.get_controller()
321+ c.see(ident, stamp, remaining)
322+ if c.dead():
323+ self.report_all()
324+ self.package_parsers = {}
325+ self.controller = self.driver = None
326+ elif actor == 'driver':
327+ d = self.get_driver()
328+ d.see(ident, stamp, remaining)
329+ elif actor == 'package':
330+ name = remaining[0]
331+ p = self.get_package_parser(name)
332+ p.see(ident, stamp, [self] + list(remaining[1:]))
333+ if p.dead():
334+ p.report_times(sys.stdout, self.epoch)
335+ self.get_controller().package_ends(p)
336+ del self.package_parsers[name]
337+ else:
338+ line = args
339+ if line != '\n': # Don't bother reporting empty lines
340+ # Unrecognized line type
341+ sys.stdout.write('%s:%s garbage: %s'
342+ % (self.path, lineno, args,))
343+ finally:
344+ self.stream.close()
345+ # Final reports can break with log files without a single logger line
346+ # (no time stamps seen, we don't care)
347+ self.report_all()
348+
349+def main(args):
350+ if len(args) != 2:
351+ print 'Usage: %s <log file path>'
352+ exit(1)
353+
354+ log_path = args[1]
355+ if not os.path.exists(log_path):
356+ print '%s does not exist' % (log_path,)
357+ exit(1)
358+
359+ log = Log(log_path)
360+ log.parse()
361+
362+
363+if __name__ == '__main__':
364+ main(sys.argv)

Subscribers

People subscribed via source and target branches