Merge lp:~stub/launchpad/session-prune into lp:launchpad/db-devel

Proposed by Stuart Bishop
Status: Merged
Approved by: Stuart Bishop
Approved revision: no longer in the source branch.
Merged at revision: 10275
Proposed branch: lp:~stub/launchpad/session-prune
Merge into: lp:launchpad/db-devel
Diff against target: 206 lines (+106/-36)
2 files modified
lib/lp/scripts/garbo.py (+29/-16)
lib/lp/scripts/tests/test_garbo.py (+77/-20)
To merge this branch: bzr merge lp:~stub/launchpad/session-prune
Reviewer Review Type Date Requested Status
Graham Binns (community) code Approve
Review via email: mp+52532@code.launchpad.net

This proposal supersedes a proposal from 2011-03-08.

Commit message

[r=gmb][bug=357516,729019] If a user has multiple authenticated sessions, prune all but the most recent 6.

Description of the change

If a user multiple authenticated sessions, remove all but the most recent 6.

Also, pull out extra_prune_clause from the BulkPruner, as it is currently only useful for pathalogical cases that don't really matter and it broke our end of loop detection (if all items of a batch are skipped due to the extra_prune_clause, then no rows would be deleted and the task would terminate). It would be possible to support this, but it will require refactoring and perform worse.

To post a comment you must log in.
Revision history for this message
Graham Binns (gmb) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/scripts/garbo.py'
2--- lib/lp/scripts/garbo.py 2011-03-03 15:19:07 +0000
3+++ lib/lp/scripts/garbo.py 2011-03-08 10:11:59 +0000
4@@ -124,12 +124,6 @@
5 # See `TunableLoop`. May be overridden.
6 maximum_chunk_size = 10000
7
8- # Optional extra WHERE clause fragment for the deletion to skip
9- # arbitrary rows flagged for deletion. For example, skip rows
10- # that might have been modified since the set of ids_to_prune
11- # was calculated.
12- extra_prune_clause = None
13-
14 def getStore(self):
15 """The master Store for the table we are pruning.
16
17@@ -156,20 +150,15 @@
18
19 def __call__(self, chunk_size):
20 """See `ITunableLoop`."""
21- if self.extra_prune_clause:
22- extra = "AND (%s)" % self.extra_prune_clause
23- else:
24- extra = ""
25 result = self.store.execute("""
26 DELETE FROM %s
27 WHERE %s IN (
28 SELECT id FROM
29 cursor_fetch('bulkprunerid', %d) AS f(id %s))
30- %s
31 """
32 % (
33 self.target_table_name, self.target_table_key,
34- chunk_size, self.target_table_key_type, extra))
35+ chunk_size, self.target_table_key_type))
36 self._num_removed = result.rowcount
37 transaction.commit()
38
39@@ -242,10 +231,33 @@
40 AND key='logintime')
41 """
42
43- # Don't delete a session if it has been used between calculating
44- # the list of sessions to remove and the current iteration.
45- prune_extra_clause = """
46- last_accessed < CURRENT_TIMESTAMP - CAST('1 day' AS interval)
47+
48+class DuplicateSessionPruner(SessionPruner):
49+ """Remove all but the most recent 6 authenticated sessions for a user.
50+
51+ We sometimes see users with dozens or thousands of authenticated
52+ sessions. To limit exposure to replay attacks, we remove all but
53+ the most recent 6 of them for a given user.
54+ """
55+
56+ ids_to_prune_query = """
57+ SELECT client_id AS id
58+ FROM (
59+ SELECT
60+ sessiondata.client_id,
61+ last_accessed,
62+ rank() OVER pickle AS rank
63+ FROM SessionData, SessionPkgData
64+ WHERE
65+ SessionData.client_id = SessionPkgData.client_id
66+ AND product_id = 'launchpad.authenticateduser'
67+ AND key='accountid'
68+ WINDOW pickle AS (PARTITION BY pickle ORDER BY last_accessed DESC)
69+ ) AS whatever
70+ WHERE
71+ rank > 6
72+ AND last_accessed < CURRENT_TIMESTAMP AT TIME ZONE 'UTC'
73+ - CAST('1 hour' AS interval)
74 """
75
76
77@@ -1042,6 +1054,7 @@
78 BugWatchScheduler,
79 AntiqueSessionPruner,
80 UnusedSessionPruner,
81+ DuplicateSessionPruner,
82 ]
83 experimental_tunable_loops = []
84
85
86=== modified file 'lib/lp/scripts/tests/test_garbo.py'
87--- lib/lp/scripts/tests/test_garbo.py 2011-03-04 11:13:32 +0000
88+++ lib/lp/scripts/tests/test_garbo.py 2011-03-08 10:11:59 +0000
89@@ -74,6 +74,7 @@
90 AntiqueSessionPruner,
91 BulkPruner,
92 DailyDatabaseGarbageCollector,
93+ DuplicateSessionPruner,
94 HourlyDatabaseGarbageCollector,
95 OpenIDConsumerAssociationPruner,
96 UnusedSessionPruner,
97@@ -205,26 +206,37 @@
98 yesterday = recent - timedelta(days=1)
99 ancient = recent - timedelta(days=61)
100
101- def make_session(client_id, accessed, authenticated=None):
102- session_data = SessionData()
103- session_data.client_id = client_id
104- session_data.last_accessed = accessed
105- IMasterStore(SessionData).add(session_data)
106-
107- if authenticated:
108- session_pkg_data = SessionPkgData()
109- session_pkg_data.client_id = client_id
110- session_pkg_data.product_id = u'launchpad.authenticateduser'
111- session_pkg_data.key = u'logintime'
112- session_pkg_data.pickle = 'value is ignored'
113- IMasterStore(SessionPkgData).add(session_pkg_data)
114-
115- make_session(u'recent_auth', recent, True)
116- make_session(u'recent_unauth', recent, False)
117- make_session(u'yesterday_auth', yesterday, True)
118- make_session(u'yesterday_unauth', yesterday, False)
119- make_session(u'ancient_auth', ancient, True)
120- make_session(u'ancient_unauth', ancient, False)
121+ self.make_session(u'recent_auth', recent, 'auth1')
122+ self.make_session(u'recent_unauth', recent, False)
123+ self.make_session(u'yesterday_auth', yesterday, 'auth2')
124+ self.make_session(u'yesterday_unauth', yesterday, False)
125+ self.make_session(u'ancient_auth', ancient, 'auth3')
126+ self.make_session(u'ancient_unauth', ancient, False)
127+
128+ def make_session(self, client_id, accessed, authenticated=None):
129+ session_data = SessionData()
130+ session_data.client_id = client_id
131+ session_data.last_accessed = accessed
132+ IMasterStore(SessionData).add(session_data)
133+
134+ if authenticated:
135+ # Add login time information.
136+ session_pkg_data = SessionPkgData()
137+ session_pkg_data.client_id = client_id
138+ session_pkg_data.product_id = u'launchpad.authenticateduser'
139+ session_pkg_data.key = u'logintime'
140+ session_pkg_data.pickle = 'value is ignored'
141+ IMasterStore(SessionPkgData).add(session_pkg_data)
142+
143+ # Add authenticated as information.
144+ session_pkg_data = SessionPkgData()
145+ session_pkg_data.client_id = client_id
146+ session_pkg_data.product_id = u'launchpad.authenticateduser'
147+ session_pkg_data.key = u'accountid'
148+ # Normally Account.id, but the session pruning works
149+ # at the SQL level and doesn't unpickle anything.
150+ session_pkg_data.pickle = authenticated
151+ IMasterStore(SessionPkgData).add(session_pkg_data)
152
153 def sessionExists(self, client_id):
154 store = IMasterStore(SessionData)
155@@ -279,6 +291,51 @@
156
157 self.assertEqual(expected_sessions, found_sessions)
158
159+ def test_duplicate_session_pruner(self):
160+ # None of the sessions created in setUp() are duplicates, so
161+ # they will all survive the pruning.
162+ expected_sessions = set([
163+ u'recent_auth',
164+ u'recent_unauth',
165+ u'yesterday_auth',
166+ u'yesterday_unauth',
167+ u'ancient_auth',
168+ u'ancient_unauth',
169+ ])
170+
171+ now = datetime.now(UTC)
172+
173+ # Make some duplicate logins from a few days ago.
174+ # Only the most recent 6 will be kept. Oldest is 'old dupe 9',
175+ # most recent 'old dupe 1'.
176+ for count in range(1, 10):
177+ self.make_session(
178+ u'old dupe %d' % count,
179+ now - timedelta(days=2, seconds=count),
180+ 'old dupe')
181+ for count in range(1, 7):
182+ expected_sessions.add(u'old dupe %d' % count)
183+
184+ # Make some other duplicate logins less than an hour old.
185+ # All of these will be kept.
186+ for count in range(1, 10):
187+ self.make_session(u'new dupe %d' % count, now, 'new dupe')
188+ expected_sessions.add(u'new dupe %d' % count)
189+
190+ chunk_size = 2
191+ log = BufferLogger()
192+ pruner = DuplicateSessionPruner(log)
193+ try:
194+ while not pruner.isDone():
195+ pruner(chunk_size)
196+ finally:
197+ pruner.cleanUp()
198+
199+ found_sessions = set(
200+ IMasterStore(SessionData).find(SessionData.client_id))
201+
202+ self.assertEqual(expected_sessions, found_sessions)
203+
204
205 class TestGarbo(TestCaseWithFactory):
206 layer = LaunchpadZopelessLayer

Subscribers

People subscribed via source and target branches

to status/vote changes: