Merge lp:~cjwatson/launchpad/sorted-apache-logs-by-age into lp:launchpad

Proposed by Colin Watson
Status: Merged
Merged at revision: 18475
Proposed branch: lp:~cjwatson/launchpad/sorted-apache-logs-by-age
Merge into: lp:launchpad
Diff against target: 79 lines (+22/-3)
3 files modified
lib/lp/services/apachelogparser/base.py (+2/-1)
lib/lp/services/apachelogparser/script.py (+1/-1)
lib/lp/services/apachelogparser/tests/test_apachelogparser.py (+19/-1)
To merge this branch: bzr merge lp:~cjwatson/launchpad/sorted-apache-logs-by-age
Reviewer Review Type Date Requested Status
William Grant code Approve
Review via email: mp+331890@code.launchpad.net

Commit message

Parse Apache log files in ascending order of mtime.

Description of the change

It turns out that sorting by name isn't ideal when there's a large backlog, because we always end up working on the large ppa.launchpad.net-access.log first. It would make more sense to work on the oldest files first, since they're most likely to be lost to log rotation soon.

To post a comment you must log in.
Revision history for this message
William Grant (wgrant) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/apachelogparser/base.py'
2--- lib/lp/services/apachelogparser/base.py 2015-10-14 15:22:01 +0000
3+++ lib/lp/services/apachelogparser/base.py 2017-10-05 19:07:01 +0000
4@@ -1,4 +1,4 @@
5-# Copyright 2009 Canonical Ltd. This software is licensed under the
6+# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
7 # GNU Affero General Public License version 3 (see the file LICENSE).
8
9 from datetime import datetime
10@@ -31,6 +31,7 @@
11 :param file_paths: The paths to the files.
12 """
13 store = IStore(ParsedApacheLog)
14+ file_paths = sorted(file_paths, key=lambda path: os.stat(path).st_mtime)
15 for file_path in file_paths:
16 fd, file_size = get_fd_and_file_size(file_path)
17 first_line = unicode(fd.readline())
18
19=== modified file 'lib/lp/services/apachelogparser/script.py'
20--- lib/lp/services/apachelogparser/script.py 2017-10-04 18:48:36 +0000
21+++ lib/lp/services/apachelogparser/script.py 2017-10-05 19:07:01 +0000
22@@ -77,7 +77,7 @@
23 # disappears before we get around to parsing it, which is
24 # desirable behaviour.
25 files_to_parse = list(get_files_to_parse(
26- sorted(glob.glob(os.path.join(self.root, self.log_file_glob)))))
27+ glob.glob(os.path.join(self.root, self.log_file_glob))))
28
29 country_set = getUtility(ICountrySet)
30 parsed_lines = 0
31
32=== modified file 'lib/lp/services/apachelogparser/tests/test_apachelogparser.py'
33--- lib/lp/services/apachelogparser/tests/test_apachelogparser.py 2015-10-14 15:22:01 +0000
34+++ lib/lp/services/apachelogparser/tests/test_apachelogparser.py 2017-10-05 19:07:01 +0000
35@@ -1,4 +1,4 @@
36-# Copyright 2009-2011 Canonical Ltd. This software is licensed under the
37+# Copyright 2009-2017 Canonical Ltd. This software is licensed under the
38 # GNU Affero General Public License version 3 (see the file LICENSE).
39
40 from datetime import datetime
41@@ -7,6 +7,9 @@
42 import os
43 from StringIO import StringIO
44 import tempfile
45+import time
46+
47+from fixtures import TempDir
48
49 from lp.services.apachelogparser.base import (
50 create_or_update_parsedlog_entry,
51@@ -22,6 +25,7 @@
52 from lp.services.database.interfaces import IStore
53 from lp.services.librarianserver.apachelogparser import DBUSER
54 from lp.services.log.logger import BufferLogger
55+from lp.services.osutils import write_file
56 from lp.testing import TestCase
57 from lp.testing.dbuser import switch_dbuser
58 from lp.testing.layers import (
59@@ -385,6 +389,20 @@
60 super(TestParsedFilesDetection, self).setUp()
61 switch_dbuser(DBUSER)
62
63+ def test_sorts_by_mtime(self):
64+ # Files are sorted by ascending mtime.
65+ root = self.useFixture(TempDir()).path
66+ file_paths = [os.path.join(root, str(name)) for name in range(3)]
67+ now = time.time()
68+ for i, path in enumerate(file_paths):
69+ write_file(path, '%s\n' % i)
70+ os.utime(path, (now - i, now - i))
71+ contents = []
72+ for fd, _ in get_files_to_parse(file_paths):
73+ fd.seek(0)
74+ contents.append(fd.read())
75+ self.assertEqual(['2\n', '1\n', '0\n'], contents)
76+
77 def test_not_parsed_file(self):
78 # A file that has never been parsed will have to be parsed from the
79 # start.