Merge lp:~vila/udd/analyze_log into lp:udd

Proposed by Vincent Ladeuil
Status: Merged
Merged at revision: 398
Proposed branch: lp:~vila/udd/analyze_log
Merge into: lp:udd
Diff against target: 364 lines (+360/-0)
1 file modified
analyze_log.py (+360/-0)
To merge this branch: bzr merge lp:~vila/udd/analyze_log
Reviewer Review Type Date Requested Status
Martin Pool Approve
Review via email: mp+49775@code.launchpad.net

Description of the change

I did some log analysis with this script and thought it can help us in the future.

It will probably have a short live for some parts of it once we can provide more data from the imports themselves but in the mean time there is no real value to keep it private.

To post a comment you must log in.
Revision history for this message
Martin Pool (mbp) wrote :

Please mention it in a HACKING file so that people can find it in future.

review: Approve
lp:~vila/udd/analyze_log updated
398. By Vincent Ladeuil

Add analyze_log to summarize past activities

Revision history for this message
Vincent Ladeuil (vila) wrote :

>>>>> Martin Pool <email address hidden> writes:

    > Review: Approve
    > Please mention it in a HACKING file so that people can find it in future.

Done: https://wiki.ubuntu.com/DistributedDevelopment/UnderTheHood/Importer/Operational

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== added file 'analyze_log.py'
--- analyze_log.py 1970-01-01 00:00:00 +0000
+++ analyze_log.py 2011-02-15 09:17:57 +0000
@@ -0,0 +1,360 @@
1#!/usr/bin/python
2"""Analyzes the package importer log files collecting various historical data.
3
4This may need to be tweaked as the logger usage evolves.
5"""
6
7import datetime
8import errno
9import operator
10import os
11import re
12import sys
13import time
14
15sys.path.insert(0, os.path.dirname(__file__))
16import icommon
17
18
19class LoggerLineMatcher(object):
20
21 actor = None
22
23 def __init__(self, actor, ident, regexp, args=None):
24 """Logger line scanner helper.
25
26 :param actor: Uniquely identify an actor in the log file.
27
28 :param ident: Uniquely identify a logger line type for agiven ``actor``.
29
30 :param regexp: Regular expression the message should match.
31
32 :param args: A list a indices in the groups() matched, each
33 corresponding group will be added in the order given in the
34 details().
35 """
36 self.actor = actor
37 self.ident = ident
38 self.regexp = re.compile(regexp)
39 self.args = args
40
41 def matches(self, msg):
42 self.match = self.regexp.match(msg)
43 return self.match is not None
44
45
46class ActorParser(object):
47 """A parser for an actor defined by the lines it emits in a log file."""
48
49 def __init__(self, ident):
50 self.ident = ident
51 self.start = None # datetime.datetime object
52 self.end = None # datetime.datetime object
53 self.duration = None #datetime.timedelta object
54
55 def see(self, state, stamp, args):
56 raise NotImplementedError(self.see)
57
58 def dead(self):
59 return self.end is not None
60
61 def round_time(self, t):
62 return t - datetime.timedelta(microseconds=t.microsecond)
63
64 def round_delta(self, d):
65 return d - datetime.timedelta(microseconds=d.microseconds)
66
67 def report_key(self):
68 return (self.end, self.start)
69
70 def report_times(self, out, epoch=None, apocalypse=None):
71 estimate = False
72 if self.start is None:
73 estimate = True
74 self.start = epoch
75 out.write('%s%s' % ([' ', '<'][estimate],
76 self.round_time(self.start)))
77 if self.end is None:
78 estimate = True
79 self.end = apocalypse
80 duration = self.end - self.start
81 if duration.seconds >= 10:
82 # Lost in the noise
83 duration = self.round_delta(duration)
84 self.duration = duration
85 out.write(' %s%s' % ([' ', '>'][estimate], duration))
86 out.write(' %s\n' % (self.ident,))
87
88
89class ControllerParser(ActorParser):
90
91 def __init__(self):
92 super(ControllerParser, self).__init__('controller')
93 self.nb_packages = 0
94 self.cumulated_duration = datetime.timedelta(0)
95 self.longest = None
96 self.longest_duration = datetime.timedelta(0)
97
98 def see(self, state, stamp, args):
99 if state == 'starting':
100 self.start = stamp
101 elif state == 'signaled':
102 pass
103 elif state == 'waiting':
104 pass
105 else:
106 assert state == 'finished'
107 self.end = stamp
108
109 def package_ends(self, pack):
110 if pack.duration > self.longest_duration:
111 self.longest = pack.ident
112 self.longest_duration = pack.duration
113 self.cumulated_duration += pack.duration
114 self.nb_packages += 1
115
116 def report_times(self, out, epoch=None, apocalypse=None):
117 super(ControllerParser, self).report_times(out, epoch, apocalypse)
118 if self.nb_packages == 0:
119 self.nb_packages = 1 # avoid divide by zero
120 out.write(' imported %6d, average %10s, longest: %10s %s\n'
121 % (self.nb_packages, self.round_delta(self.cumulated_duration
122 / self.nb_packages),
123 self.round_delta(self.longest_duration), self.longest,
124 ))
125
126
127class DriverParser(ActorParser):
128
129 def __init__(self):
130 super(DriverParser, self).__init__('driver')
131 self.concurrency = 0
132
133 def see(self, state, stamp, args):
134 if state == 'starting':
135 self.start = stamp
136 elif state == 'active':
137 concurrency = len(args[0].split(','))
138 if concurrency != self.concurrency:
139 self.concurrency = concurrency
140 print '>>> %s - Concurrency is now: %d' % (stamp, concurrency,)
141 elif state in ('asked_to_stop', 'crashed'):
142 pass
143 else:
144 assert state in ('stopping', 'finished')
145 self.end = stamp
146
147
148class PackageParser(ActorParser):
149
150 def __init__(self, name):
151 super(PackageParser, self).__init__(name)
152 self.tb = None
153
154 def see(self, state, stamp, args):
155 if state == 'trying':
156 self.start = stamp
157 elif state == 'locked':
158 self.end = stamp
159 elif state == 'finished':
160 pass
161 elif state == 'success':
162 # There is something to grab in args there, distro, pocket, etc
163 self.end = stamp
164 else:
165 assert state == 'failed'
166 self.end = stamp
167 # This is followed by "something" containing a
168 # traceback, so we slurp all lines until the next valid
169 # logger line
170 log = args[0]
171 something = []
172 while True:
173 lineno, line = log.next_line()
174 # Trying to verify that ``line`` is a proper logger
175 # line is far too expensive, so we try a cheap
176 # substitute instead the initial date will start
177 # with 20 for the current century.
178 if line.startswith('20'):
179 log.pushed_back_line = line
180 break
181 something.append(line)
182 self.tb = ''.join(something)
183
184
185class Log(object):
186
187 def __init__(self, path):
188 self.path = path
189 self.stream = iter(open(self.path))
190 self.lineno = 0
191 self.pushed_back_line = None
192 # The first and last stamp seen in the log file
193 self.epoch = None
194 self.apocalypse = None
195 # A logger line is:
196 # <time stamp> - <component?> - level - msg
197 self.logger_line_re = re.compile(
198 '([0-9- :,]{23}) - ([^ -]+) - ([A-Z]+) - (.*)')
199
200 def Controller(ident, regexp):
201 return LoggerLineMatcher('controller', ident, regexp)
202
203 def Driver(ident, regexp):
204 return LoggerLineMatcher('driver', ident, regexp)
205
206 def Package(ident, regexp):
207 return LoggerLineMatcher('package', ident, regexp)
208
209
210 # All the possible lines, most often found first (as a cheap shot
211 # optimization).
212 self.matchers = [
213 Package('trying', 'Trying ([^ ]+)'),
214 Package('locked', "Couldn't lock ([^ ]+), skipping"),
215 Package('success', 'Success ([^ ]+): (.*)'),
216 Package('finished', 'thread for ([^ ]+) finished'),
217 Package('failed', 'Importing ([^ ]+) failed:'),
218 # Driver logger lines
219 Driver('active', 'threads for \[([^\]]+)\] still active'),
220 Driver('asked_to_stop', 'Driver asked to stop'),
221 Driver('crashed', 'Driver hit (.*)'),
222 Driver('stopping', 'Driver stopping'),
223 Driver('finished', 'Driver finished: stopping'),
224 # Controller logger lines
225 Controller('starting', 'Starting up'),
226 Controller('signaled', 'Received signal'),
227 Controller('waiting', 'Waiting for driver to finish'),
228 Controller('finished', 'Finished'),
229 ]
230 # At any given point in time, there could be one controller, one driver
231 # and several packages writing to the log file. There is no guarantee
232 # that any of them will have corresponding lines for starting or
233 # finishing (not to mention dieing).
234 self.controller = None
235 self.driver = None
236 self.package_parsers = {}
237
238 def scan_logger_line(self, stamp_str, component, level, msg):
239 stamp = datetime.datetime.strptime(stamp_str[:-4], '%Y-%m-%d %H:%M:%S')
240 # strptime can't parse the miliseconds, do it manually
241 stamp = stamp.replace(microsecond=int(stamp_str[-3:])*1000)
242 if self.epoch is None:
243 self.epoch = stamp
244 self.apocalypse = stamp
245
246 for m in self.matchers:
247 if m.matches(msg):
248 return (stamp, m.actor, m.ident, m.match.groups())
249
250 return (stamp, 'unknown', 'garbage', rest)
251
252 def next_line(self):
253 if self.pushed_back_line is not None:
254 # Give back the pushed back line without changing the lineno
255 line = self.pushed_back_line
256 self.pushed_back_line = None
257 else:
258 line = self.stream.next()
259 self.lineno +=1
260 return self.lineno, line
261
262 def scan(self):
263 while True:
264 lineno, line = self.next_line()
265 res = self.logger_line_re.match(line)
266 if res is not None:
267 stamp, component, level, msg = res.groups()
268 yield lineno, 'logger', self.scan_logger_line(
269 stamp, component, level, msg)
270 else:
271 yield lineno, 'garbage', line
272
273 def get_controller(self):
274 # autovivification
275 if self.controller is None:
276 self.controller = ControllerParser()
277 return self.controller
278
279 def get_driver(self):
280 # autovivification
281 if self.driver is None:
282 self.driver = DriverParser()
283 return self.driver
284
285 def get_package_parser(self, name):
286 try:
287 p = self.package_parsers[name]
288 except KeyError:
289 # autovivification
290 p = PackageParser(name)
291 self.package_parsers[name] = p
292 return p
293
294 def report_all(self):
295 c = self.get_controller()
296 packages = sorted(self.package_parsers.values(),
297 key=operator.methodcaller('report_key'))
298 for p in packages:
299 # Sort by p.start (including None)
300 p.report_times(sys.stdout, self.epoch, self.apocalypse)
301 c.package_ends(p)
302 self.get_driver().report_times(sys.stdout, self.epoch, self.apocalypse)
303 c.report_times(sys.stdout, self.epoch, self.apocalypse)
304
305 def parse(self):
306 # controller and driver always run together (there is a bug if the
307 # controller is killed with a hammer, we don't care.
308 self.get_controller()
309 self.get_driver()
310 try:
311 for lineno, typ, args in self.scan():
312 if typ == 'logger':
313 (stamp, actor, ident,
314 remaining) = args[0], args[1], args[2], args[3]
315 if actor == 'controller':
316 c = self.get_controller()
317 c.see(ident, stamp, remaining)
318 if c.dead():
319 self.report_all()
320 self.package_parsers = {}
321 self.controller = self.driver = None
322 elif actor == 'driver':
323 d = self.get_driver()
324 d.see(ident, stamp, remaining)
325 elif actor == 'package':
326 name = remaining[0]
327 p = self.get_package_parser(name)
328 p.see(ident, stamp, [self] + list(remaining[1:]))
329 if p.dead():
330 p.report_times(sys.stdout, self.epoch)
331 self.get_controller().package_ends(p)
332 del self.package_parsers[name]
333 else:
334 line = args
335 if line != '\n': # Don't bother reporting empty lines
336 # Unrecognized line type
337 sys.stdout.write('%s:%s garbage: %s'
338 % (self.path, lineno, args,))
339 finally:
340 self.stream.close()
341 # Final reports can break with log files without a single logger line
342 # (no time stamps seen, we don't care)
343 self.report_all()
344
345def main(args):
346 if len(args) != 2:
347 print 'Usage: %s <log file path>'
348 exit(1)
349
350 log_path = args[1]
351 if not os.path.exists(log_path):
352 print '%s does not exist' % (log_path,)
353 exit(1)
354
355 log = Log(log_path)
356 log.parse()
357
358
359if __name__ == '__main__':
360 main(sys.argv)

Subscribers

People subscribed via source and target branches