Merge lp:~vila/udd/analyze_log into lp:udd
- analyze_log
- Merge into import-scripts
Proposed by
Vincent Ladeuil
Status: | Merged |
---|---|
Merged at revision: | 398 |
Proposed branch: | lp:~vila/udd/analyze_log |
Merge into: | lp:udd |
Diff against target: |
364 lines (+360/-0) 1 file modified
analyze_log.py (+360/-0) |
To merge this branch: | bzr merge lp:~vila/udd/analyze_log |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Martin Pool | Approve | ||
Review via email: mp+49775@code.launchpad.net |
Commit message
Description of the change
I did some log analysis with this script and thought it can help us in the future.
It will probably have a short live for some parts of it once we can provide more data from the imports themselves but in the mean time there is no real value to keep it private.
To post a comment you must log in.
lp:~vila/udd/analyze_log
updated
- 398. By Vincent Ladeuil
-
Add analyze_log to summarize past activities
Revision history for this message
Vincent Ladeuil (vila) wrote : | # |
>>>>> Martin Pool <email address hidden> writes:
> Review: Approve
> Please mention it in a HACKING file so that people can find it in future.
Done: https:/
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'analyze_log.py' |
2 | --- analyze_log.py 1970-01-01 00:00:00 +0000 |
3 | +++ analyze_log.py 2011-02-15 09:17:57 +0000 |
4 | @@ -0,0 +1,360 @@ |
5 | +#!/usr/bin/python |
6 | +"""Analyzes the package importer log files collecting various historical data. |
7 | + |
8 | +This may need to be tweaked as the logger usage evolves. |
9 | +""" |
10 | + |
11 | +import datetime |
12 | +import errno |
13 | +import operator |
14 | +import os |
15 | +import re |
16 | +import sys |
17 | +import time |
18 | + |
19 | +sys.path.insert(0, os.path.dirname(__file__)) |
20 | +import icommon |
21 | + |
22 | + |
23 | +class LoggerLineMatcher(object): |
24 | + |
25 | + actor = None |
26 | + |
27 | + def __init__(self, actor, ident, regexp, args=None): |
28 | + """Logger line scanner helper. |
29 | + |
30 | + :param actor: Uniquely identify an actor in the log file. |
31 | + |
32 | + :param ident: Uniquely identify a logger line type for agiven ``actor``. |
33 | + |
34 | + :param regexp: Regular expression the message should match. |
35 | + |
36 | + :param args: A list a indices in the groups() matched, each |
37 | + corresponding group will be added in the order given in the |
38 | + details(). |
39 | + """ |
40 | + self.actor = actor |
41 | + self.ident = ident |
42 | + self.regexp = re.compile(regexp) |
43 | + self.args = args |
44 | + |
45 | + def matches(self, msg): |
46 | + self.match = self.regexp.match(msg) |
47 | + return self.match is not None |
48 | + |
49 | + |
50 | +class ActorParser(object): |
51 | + """A parser for an actor defined by the lines it emits in a log file.""" |
52 | + |
53 | + def __init__(self, ident): |
54 | + self.ident = ident |
55 | + self.start = None # datetime.datetime object |
56 | + self.end = None # datetime.datetime object |
57 | + self.duration = None #datetime.timedelta object |
58 | + |
59 | + def see(self, state, stamp, args): |
60 | + raise NotImplementedError(self.see) |
61 | + |
62 | + def dead(self): |
63 | + return self.end is not None |
64 | + |
65 | + def round_time(self, t): |
66 | + return t - datetime.timedelta(microseconds=t.microsecond) |
67 | + |
68 | + def round_delta(self, d): |
69 | + return d - datetime.timedelta(microseconds=d.microseconds) |
70 | + |
71 | + def report_key(self): |
72 | + return (self.end, self.start) |
73 | + |
74 | + def report_times(self, out, epoch=None, apocalypse=None): |
75 | + estimate = False |
76 | + if self.start is None: |
77 | + estimate = True |
78 | + self.start = epoch |
79 | + out.write('%s%s' % ([' ', '<'][estimate], |
80 | + self.round_time(self.start))) |
81 | + if self.end is None: |
82 | + estimate = True |
83 | + self.end = apocalypse |
84 | + duration = self.end - self.start |
85 | + if duration.seconds >= 10: |
86 | + # Lost in the noise |
87 | + duration = self.round_delta(duration) |
88 | + self.duration = duration |
89 | + out.write(' %s%s' % ([' ', '>'][estimate], duration)) |
90 | + out.write(' %s\n' % (self.ident,)) |
91 | + |
92 | + |
93 | +class ControllerParser(ActorParser): |
94 | + |
95 | + def __init__(self): |
96 | + super(ControllerParser, self).__init__('controller') |
97 | + self.nb_packages = 0 |
98 | + self.cumulated_duration = datetime.timedelta(0) |
99 | + self.longest = None |
100 | + self.longest_duration = datetime.timedelta(0) |
101 | + |
102 | + def see(self, state, stamp, args): |
103 | + if state == 'starting': |
104 | + self.start = stamp |
105 | + elif state == 'signaled': |
106 | + pass |
107 | + elif state == 'waiting': |
108 | + pass |
109 | + else: |
110 | + assert state == 'finished' |
111 | + self.end = stamp |
112 | + |
113 | + def package_ends(self, pack): |
114 | + if pack.duration > self.longest_duration: |
115 | + self.longest = pack.ident |
116 | + self.longest_duration = pack.duration |
117 | + self.cumulated_duration += pack.duration |
118 | + self.nb_packages += 1 |
119 | + |
120 | + def report_times(self, out, epoch=None, apocalypse=None): |
121 | + super(ControllerParser, self).report_times(out, epoch, apocalypse) |
122 | + if self.nb_packages == 0: |
123 | + self.nb_packages = 1 # avoid divide by zero |
124 | + out.write(' imported %6d, average %10s, longest: %10s %s\n' |
125 | + % (self.nb_packages, self.round_delta(self.cumulated_duration |
126 | + / self.nb_packages), |
127 | + self.round_delta(self.longest_duration), self.longest, |
128 | + )) |
129 | + |
130 | + |
131 | +class DriverParser(ActorParser): |
132 | + |
133 | + def __init__(self): |
134 | + super(DriverParser, self).__init__('driver') |
135 | + self.concurrency = 0 |
136 | + |
137 | + def see(self, state, stamp, args): |
138 | + if state == 'starting': |
139 | + self.start = stamp |
140 | + elif state == 'active': |
141 | + concurrency = len(args[0].split(',')) |
142 | + if concurrency != self.concurrency: |
143 | + self.concurrency = concurrency |
144 | + print '>>> %s - Concurrency is now: %d' % (stamp, concurrency,) |
145 | + elif state in ('asked_to_stop', 'crashed'): |
146 | + pass |
147 | + else: |
148 | + assert state in ('stopping', 'finished') |
149 | + self.end = stamp |
150 | + |
151 | + |
152 | +class PackageParser(ActorParser): |
153 | + |
154 | + def __init__(self, name): |
155 | + super(PackageParser, self).__init__(name) |
156 | + self.tb = None |
157 | + |
158 | + def see(self, state, stamp, args): |
159 | + if state == 'trying': |
160 | + self.start = stamp |
161 | + elif state == 'locked': |
162 | + self.end = stamp |
163 | + elif state == 'finished': |
164 | + pass |
165 | + elif state == 'success': |
166 | + # There is something to grab in args there, distro, pocket, etc |
167 | + self.end = stamp |
168 | + else: |
169 | + assert state == 'failed' |
170 | + self.end = stamp |
171 | + # This is followed by "something" containing a |
172 | + # traceback, so we slurp all lines until the next valid |
173 | + # logger line |
174 | + log = args[0] |
175 | + something = [] |
176 | + while True: |
177 | + lineno, line = log.next_line() |
178 | + # Trying to verify that ``line`` is a proper logger |
179 | + # line is far too expensive, so we try a cheap |
180 | + # substitute instead the initial date will start |
181 | + # with 20 for the current century. |
182 | + if line.startswith('20'): |
183 | + log.pushed_back_line = line |
184 | + break |
185 | + something.append(line) |
186 | + self.tb = ''.join(something) |
187 | + |
188 | + |
189 | +class Log(object): |
190 | + |
191 | + def __init__(self, path): |
192 | + self.path = path |
193 | + self.stream = iter(open(self.path)) |
194 | + self.lineno = 0 |
195 | + self.pushed_back_line = None |
196 | + # The first and last stamp seen in the log file |
197 | + self.epoch = None |
198 | + self.apocalypse = None |
199 | + # A logger line is: |
200 | + # <time stamp> - <component?> - level - msg |
201 | + self.logger_line_re = re.compile( |
202 | + '([0-9- :,]{23}) - ([^ -]+) - ([A-Z]+) - (.*)') |
203 | + |
204 | + def Controller(ident, regexp): |
205 | + return LoggerLineMatcher('controller', ident, regexp) |
206 | + |
207 | + def Driver(ident, regexp): |
208 | + return LoggerLineMatcher('driver', ident, regexp) |
209 | + |
210 | + def Package(ident, regexp): |
211 | + return LoggerLineMatcher('package', ident, regexp) |
212 | + |
213 | + |
214 | + # All the possible lines, most often found first (as a cheap shot |
215 | + # optimization). |
216 | + self.matchers = [ |
217 | + Package('trying', 'Trying ([^ ]+)'), |
218 | + Package('locked', "Couldn't lock ([^ ]+), skipping"), |
219 | + Package('success', 'Success ([^ ]+): (.*)'), |
220 | + Package('finished', 'thread for ([^ ]+) finished'), |
221 | + Package('failed', 'Importing ([^ ]+) failed:'), |
222 | + # Driver logger lines |
223 | + Driver('active', 'threads for \[([^\]]+)\] still active'), |
224 | + Driver('asked_to_stop', 'Driver asked to stop'), |
225 | + Driver('crashed', 'Driver hit (.*)'), |
226 | + Driver('stopping', 'Driver stopping'), |
227 | + Driver('finished', 'Driver finished: stopping'), |
228 | + # Controller logger lines |
229 | + Controller('starting', 'Starting up'), |
230 | + Controller('signaled', 'Received signal'), |
231 | + Controller('waiting', 'Waiting for driver to finish'), |
232 | + Controller('finished', 'Finished'), |
233 | + ] |
234 | + # At any given point in time, there could be one controller, one driver |
235 | + # and several packages writing to the log file. There is no guarantee |
236 | + # that any of them will have corresponding lines for starting or |
237 | + # finishing (not to mention dieing). |
238 | + self.controller = None |
239 | + self.driver = None |
240 | + self.package_parsers = {} |
241 | + |
242 | + def scan_logger_line(self, stamp_str, component, level, msg): |
243 | + stamp = datetime.datetime.strptime(stamp_str[:-4], '%Y-%m-%d %H:%M:%S') |
244 | + # strptime can't parse the miliseconds, do it manually |
245 | + stamp = stamp.replace(microsecond=int(stamp_str[-3:])*1000) |
246 | + if self.epoch is None: |
247 | + self.epoch = stamp |
248 | + self.apocalypse = stamp |
249 | + |
250 | + for m in self.matchers: |
251 | + if m.matches(msg): |
252 | + return (stamp, m.actor, m.ident, m.match.groups()) |
253 | + |
254 | + return (stamp, 'unknown', 'garbage', rest) |
255 | + |
256 | + def next_line(self): |
257 | + if self.pushed_back_line is not None: |
258 | + # Give back the pushed back line without changing the lineno |
259 | + line = self.pushed_back_line |
260 | + self.pushed_back_line = None |
261 | + else: |
262 | + line = self.stream.next() |
263 | + self.lineno +=1 |
264 | + return self.lineno, line |
265 | + |
266 | + def scan(self): |
267 | + while True: |
268 | + lineno, line = self.next_line() |
269 | + res = self.logger_line_re.match(line) |
270 | + if res is not None: |
271 | + stamp, component, level, msg = res.groups() |
272 | + yield lineno, 'logger', self.scan_logger_line( |
273 | + stamp, component, level, msg) |
274 | + else: |
275 | + yield lineno, 'garbage', line |
276 | + |
277 | + def get_controller(self): |
278 | + # autovivification |
279 | + if self.controller is None: |
280 | + self.controller = ControllerParser() |
281 | + return self.controller |
282 | + |
283 | + def get_driver(self): |
284 | + # autovivification |
285 | + if self.driver is None: |
286 | + self.driver = DriverParser() |
287 | + return self.driver |
288 | + |
289 | + def get_package_parser(self, name): |
290 | + try: |
291 | + p = self.package_parsers[name] |
292 | + except KeyError: |
293 | + # autovivification |
294 | + p = PackageParser(name) |
295 | + self.package_parsers[name] = p |
296 | + return p |
297 | + |
298 | + def report_all(self): |
299 | + c = self.get_controller() |
300 | + packages = sorted(self.package_parsers.values(), |
301 | + key=operator.methodcaller('report_key')) |
302 | + for p in packages: |
303 | + # Sort by p.start (including None) |
304 | + p.report_times(sys.stdout, self.epoch, self.apocalypse) |
305 | + c.package_ends(p) |
306 | + self.get_driver().report_times(sys.stdout, self.epoch, self.apocalypse) |
307 | + c.report_times(sys.stdout, self.epoch, self.apocalypse) |
308 | + |
309 | + def parse(self): |
310 | + # controller and driver always run together (there is a bug if the |
311 | + # controller is killed with a hammer, we don't care. |
312 | + self.get_controller() |
313 | + self.get_driver() |
314 | + try: |
315 | + for lineno, typ, args in self.scan(): |
316 | + if typ == 'logger': |
317 | + (stamp, actor, ident, |
318 | + remaining) = args[0], args[1], args[2], args[3] |
319 | + if actor == 'controller': |
320 | + c = self.get_controller() |
321 | + c.see(ident, stamp, remaining) |
322 | + if c.dead(): |
323 | + self.report_all() |
324 | + self.package_parsers = {} |
325 | + self.controller = self.driver = None |
326 | + elif actor == 'driver': |
327 | + d = self.get_driver() |
328 | + d.see(ident, stamp, remaining) |
329 | + elif actor == 'package': |
330 | + name = remaining[0] |
331 | + p = self.get_package_parser(name) |
332 | + p.see(ident, stamp, [self] + list(remaining[1:])) |
333 | + if p.dead(): |
334 | + p.report_times(sys.stdout, self.epoch) |
335 | + self.get_controller().package_ends(p) |
336 | + del self.package_parsers[name] |
337 | + else: |
338 | + line = args |
339 | + if line != '\n': # Don't bother reporting empty lines |
340 | + # Unrecognized line type |
341 | + sys.stdout.write('%s:%s garbage: %s' |
342 | + % (self.path, lineno, args,)) |
343 | + finally: |
344 | + self.stream.close() |
345 | + # Final reports can break with log files without a single logger line |
346 | + # (no time stamps seen, we don't care) |
347 | + self.report_all() |
348 | + |
349 | +def main(args): |
350 | + if len(args) != 2: |
351 | + print 'Usage: %s <log file path>' |
352 | + exit(1) |
353 | + |
354 | + log_path = args[1] |
355 | + if not os.path.exists(log_path): |
356 | + print '%s does not exist' % (log_path,) |
357 | + exit(1) |
358 | + |
359 | + log = Log(log_path) |
360 | + log.parse() |
361 | + |
362 | + |
363 | +if __name__ == '__main__': |
364 | + main(sys.argv) |
Please mention it in a HACKING file so that people can find it in future.