Merge lp:~jtv/launchpad/faster-security into lp:launchpad

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: no longer in the source branch.
Merge reported by: Jeroen T. Vermeulen
Merged at revision: not available
Proposed branch: lp:~jtv/launchpad/faster-security
Merge into: lp:launchpad
Diff against target: 384 lines (+182/-99)
1 file modified
database/schema/security.py (+182/-99)
To merge this branch: bzr merge lp:~jtv/launchpad/faster-security
Reviewer Review Type Date Requested Status
Stuart Bishop (community) Approve
Review via email: mp+52804@code.launchpad.net

Commit message

[r=stub][no-qa] Cleaner & more consistent GRANT/REVOKE bundling in security.py.

Description of the change

The code in security.py takes a bit too long and is hard to maintain. The latter is in part my fault: to speed it up in the past I introduced some very ad-hoc bundling of GRANT statements in order to reduce their number. On two test runs I did just now, the script took 6.6 and 8 seconds, respectively — better than it used to be but still not as good as I'd like.

In this branch I regularize the bundling of GRANT and REVOKE statements using a single, purpose-built class. Thus some of the code moves out of the vast stretches of reset_permissions code and some of the duplication goes away.

It also speeds things up a bit: both of the test runs I did just now took 3.9 seconds.

The bundling isn't "optimal." The API leaves room for optimizing it further, but I noticed two things:

1. There's not that much more to gain.

2. When I tried a more aggressive bundling setup that produced even fewer statements, the script actually got a lot slower.

But what's here should be pretty decent. It passes EC2.

Jeroen

To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) wrote :

This is a clear improvement. Thanks.

For the future, we can simplify this code further by dropping the USER/GROUP distinction and just using ROLES instead.

review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'database/schema/security.py'
2--- database/schema/security.py 2010-11-11 14:13:19 +0000
3+++ database/schema/security.py 2011-03-21 03:36:49 +0000
4@@ -19,7 +19,6 @@
5
6 from canonical.database.sqlbase import connect
7 from canonical.launchpad.scripts import logger_options, logger, db_options
8-from lp.services.log.loglevels import DEBUG2
9 from fti import quote_identifier
10 import replication.helpers
11
12@@ -36,6 +35,7 @@
13
14
15 class DbObject(object):
16+
17 def __init__(
18 self, schema, name, type_, owner, arguments=None, language=None):
19 self.schema = schema
20@@ -50,9 +50,7 @@
21
22 @property
23 def fullname(self):
24- fn = "%s.%s" % (
25- self.schema, self.name
26- )
27+ fn = "%s.%s" % (self.schema, self.name)
28 if self.type == 'function':
29 fn = "%s(%s)" % (fn, self.arguments)
30 return fn
31@@ -67,6 +65,7 @@
32 class DbSchema(dict):
33 groups = None # List of groups defined in the db
34 users = None # List of users defined in the db
35+
36 def __init__(self, con):
37 super(DbSchema, self).__init__()
38 cur = con.cursor()
39@@ -113,8 +112,7 @@
40 """)
41 for schema, name, arguments, owner, language in cur.fetchall():
42 self['%s.%s(%s)' % (schema, name, arguments)] = DbObject(
43- schema, name, 'function', owner, arguments, language
44- )
45+ schema, name, 'function', owner, arguments, language)
46 # Pull a list of groups
47 cur.execute("SELECT groname FROM pg_group")
48 self.groups = [r[0] for r in cur.fetchall()]
49@@ -129,6 +127,7 @@
50
51
52 class CursorWrapper(object):
53+
54 def __init__(self, cursor):
55 self.__dict__['_cursor'] = cursor
56
57@@ -149,7 +148,7 @@
58
59
60 CONFIG_DEFAULTS = {
61- 'groups': ''
62+ 'groups': '',
63 }
64
65
66@@ -192,6 +191,120 @@
67 quote_identifier(identifier) for identifier in identifiers])
68
69
70+class PermissionGatherer:
71+ """Gather permissions for bulk granting or revocation.
72+
73+ Processing such statements in bulk (with multiple users, tables,
74+ or permissions in one statement) is faster than issuing very large
75+ numbers of individual statements.
76+ """
77+
78+ def __init__(self, entity_keyword):
79+ """Gather for SQL entities of one kind (TABLE, FUNCTION, SEQUENCE).
80+
81+ :param entity_keyword: The SQL keyword for the kind of entity
82+ that permissions will be gathered for.
83+ """
84+ self.entity_keyword = entity_keyword
85+ self.permissions = defaultdict(dict)
86+
87+ def add(self, permission, entity, principal, is_group=False):
88+ """Add a permission.
89+
90+ Add all privileges you want to grant or revoke first, then use
91+ `grant` or `revoke` to process them in bulk.
92+
93+ :param permission: A permission: SELECT, INSERT, EXECUTE, etc.
94+ :param entity: Table, function, or sequence on which to grant
95+ or revoke a privilege.
96+ :param principal: User or group to which the privilege should
97+ apply.
98+ :param is_group: Is `principal` a group?
99+ """
100+ if is_group:
101+ full_principal = "GROUP " + principal
102+ else:
103+ full_principal = principal
104+ self.permissions[permission].setdefault(entity, set()).add(
105+ full_principal)
106+
107+ def tabulate(self):
108+ """Group privileges into single-statement work items.
109+
110+ Each entry returned by this method represents a batch of
111+ privileges that can be granted or revoked in a single SQL
112+ statement.
113+
114+ :return: A sequence of tuples of strings: permission(s) to
115+ grant/revoke, entity or entities to act on, and principal(s)
116+ to grant or revoke for. Each is a string.
117+ """
118+ result = []
119+ for permission, parties in self.permissions.iteritems():
120+ for entity, principals in parties.iteritems():
121+ result.append(
122+ (permission, entity, ", ".join(principals)))
123+ return result
124+
125+ def countPermissions(self):
126+ """Count the number of different permissions."""
127+ return len(self.permissions)
128+
129+ def countEntities(self):
130+ """Count the number of different entities."""
131+ return len(set(sum([
132+ entities.keys()
133+ for entities in self.permissions.itervalues()], [])))
134+
135+ def countPrincipals(self):
136+ """Count the number of different principals."""
137+ principals = set()
138+ for entities_and_principals in self.permissions.itervalues():
139+ for extra_principals in entities_and_principals.itervalues():
140+ principals.update(extra_principals)
141+ return len(principals)
142+
143+ def grant(self, cur):
144+ """Grant all gathered permissions.
145+
146+ :param cur: A cursor to operate on.
147+ """
148+ log.debug(
149+ "Granting %d permission(s) on %d %s(s) for %d user(s)/group(s).",
150+ self.countPermissions(),
151+ self.countEntities(),
152+ self.entity_keyword,
153+ self.countPrincipals())
154+ grant_count = 0
155+ for permissions, entities, principals in self.tabulate():
156+ grant = "GRANT %s ON %s %s TO %s" % (
157+ permissions, self.entity_keyword, entities, principals)
158+ log.debug2(grant)
159+ cur.execute(grant)
160+ grant_count += 1
161+ log.debug("Issued %d GRANT statement(s).", grant_count)
162+
163+ def revoke(self, cur):
164+ """Revoke all gathered permissions.
165+
166+ :param cur: A cursor to operate on.
167+ """
168+ log.debug(
169+ "Revoking %d permission(s) on %d %s(s) for %d user(s)/group(s).",
170+ self.countPermissions(),
171+ self.countEntities(),
172+ self.entity_keyword,
173+ self.countPrincipals())
174+ revoke_count = 0
175+ for permissions, entities, principals in self.tabulate():
176+ revoke = "REVOKE %s ON %s %s FROM %s" % (
177+ permissions, self.entity_keyword, entities, principals)
178+ log.debug2(revoke)
179+ cur.execute(revoke)
180+ revoke_count += 1
181+ log.debug("Issued %d REVOKE statement(s).", revoke_count)
182+
183+
184 def reset_permissions(con, config, options):
185 schema = DbSchema(con)
186 all_users = list_identifiers(schema.users)
187@@ -264,8 +377,7 @@
188 continue
189 groups = [
190 g.strip() for g in config.get(user, 'groups', '').split(',')
191- if g.strip()
192- ]
193+ if g.strip()]
194 # Read-Only users get added to Read-Only groups.
195 if user.endswith('_ro'):
196 groups = ['%s_ro' % group for group in groups]
197@@ -287,36 +399,37 @@
198 cur.execute("ALTER TABLE %s OWNER TO %s" % (
199 obj.fullname, quote_identifier(options.owner)))
200
201- # Revoke all privs from known groups. Don't revoke anything for
202- # users or groups not defined in our security.cfg.
203- revocations = defaultdict(list)
204- # Gather all revocations.
205- for section_name in config.sections():
206- for obj in schema.values():
207- if obj.type == 'function':
208- t = 'FUNCTION'
209- else:
210- t = 'TABLE'
211-
212- item = "%s %s" % (t, obj.fullname)
213-
214- roles = [section_name]
215- if section_name != 'public':
216- roles.append(section_name + '_ro')
217-
218- revocations[item] += roles
219-
220- if schema.has_key(obj.seqname):
221- revocations["SEQUENCE %s" % obj.seqname] += roles
222-
223- # Now batch up and execute all revocations.
224 if options.revoke:
225- for item, roles in revocations.iteritems():
226- if roles:
227- log.debug("Revoking permissions on %s", item)
228- cur.execute(
229- "REVOKE ALL ON %s FROM %s"
230- % (item, list_identifiers(roles)))
231+ # Revoke all privs from known groups. Don't revoke anything for
232+ # users or groups not defined in our security.cfg.
233+ table_revocations = PermissionGatherer("TABLE")
234+ function_revocations = PermissionGatherer("FUNCTION")
235+ sequence_revocations = PermissionGatherer("SEQUENCE")
236+
237+ # Gather all revocations.
238+ for section_name in config.sections():
239+ role = quote_identifier(section_name)
240+ if section_name == 'public':
241+ ro_role = None
242+ else:
243+ ro_role = quote_identifier(section_name + "_ro")
244+
245+ for obj in schema.values():
246+ if obj.type == 'function':
247+ gatherer = function_revocations
248+ else:
249+ gatherer = table_revocations
250+
251+ gatherer.add("ALL", obj.fullname, role)
252+
253+ if obj.seqname in schema:
254+ sequence_revocations.add("ALL", obj.seqname, role)
255+ if ro_role is not None:
256+ sequence_revocations.add("ALL", obj.seqname, ro_role)
257+
258+ table_revocations.revoke(cur)
259+ function_revocations.revoke(cur)
260+ sequence_revocations.revoke(cur)
261 else:
262 log.info("Not revoking permissions on database objects")
263
264@@ -327,8 +440,9 @@
265
266 # Set permissions as per config file
267
268- functions = set()
269- tables = set()
270+ table_permissions = PermissionGatherer("TABLE")
271+ function_permissions = PermissionGatherer("FUNCTION")
272+ sequence_permissions = PermissionGatherer("SEQUENCE")
273
274 for username in config.sections():
275 for obj_name, perm in config.items(username):
276@@ -355,73 +469,42 @@
277 log.debug(
278 "Granting %s on %s to %s", perm, obj.fullname, who)
279 if obj.type == 'function':
280- functions.add(obj.fullname)
281- cur.execute(
282- 'GRANT %s ON FUNCTION %s TO %s'
283- % (perm, obj.fullname, who))
284- cur.execute(
285- 'GRANT EXECUTE ON FUNCTION %s TO GROUP %s'
286- % (obj.fullname, who_ro))
287+ function_permissions.add(perm, obj.fullname, who)
288+ function_permissions.add("EXECUTE", obj.fullname, who_ro)
289+ function_permissions.add(
290+ "EXECUTE", obj.fullname, "read", is_group=True)
291+ function_permissions.add(
292+ "ALL", obj.fullname, "admin", is_group=True)
293 else:
294- tables.add(obj.fullname)
295- cur.execute(
296- 'GRANT %s ON TABLE %s TO %s'
297- % (perm, obj.fullname, who))
298- cur.execute(
299- 'GRANT SELECT ON TABLE %s TO %s'
300- % (obj.fullname, who_ro))
301- if schema.has_key(obj.seqname):
302+ table_permissions.add(
303+ "ALL", obj.fullname, "admin", is_group=True)
304+ table_permissions.add(perm, obj.fullname, who)
305+ table_permissions.add("SELECT", obj.fullname, who_ro)
306+ is_secure = (obj.fullname in SECURE_TABLES)
307+ if not is_secure:
308+ table_permissions.add(
309+ "SELECT", obj.fullname, "read", is_group=True)
310+ if obj.seqname in schema:
311 if 'INSERT' in perm:
312 seqperm = 'USAGE'
313 elif 'SELECT' in perm:
314 seqperm = 'SELECT'
315- log.debug(
316- "Granting %s on %s to %s", seqperm, obj.seqname, who)
317- cur.execute(
318- 'GRANT %s ON %s TO %s'
319- % (seqperm, obj.seqname, who))
320- if obj.fullname not in SECURE_TABLES:
321- cur.execute(
322- 'GRANT SELECT ON %s TO GROUP read'
323- % obj.seqname)
324- cur.execute(
325- 'GRANT ALL ON %s TO GROUP admin'
326- % obj.seqname)
327- cur.execute(
328- 'GRANT SELECT ON %s TO %s'
329- % (obj.seqname, who_ro))
330+ sequence_permissions.add(seqperm, obj.seqname, who)
331+ if not is_secure:
332+ sequence_permissions.add(
333+ "SELECT", obj.seqname, "read", is_group=True)
334+ sequence_permissions.add("SELECT", obj.seqname, who_ro)
335+ sequence_permissions.add(
336+ "ALL", obj.seqname, "admin", is_group=True)
337
338- # A few groups get special rights to every function or table. Batch
339- # the schema manipulations to save time.
340- log.debug(
341- "Granting permissions to %d functions to magic roles",
342- len(functions))
343- if functions:
344- functions_text = ', '.join(functions)
345- cur.execute(
346- "GRANT EXECUTE ON FUNCTION %s TO GROUP read" % functions_text)
347- cur.execute(
348- "GRANT ALL ON FUNCTION %s TO GROUP admin" % functions_text)
349- log.debug(
350- "Granting permissions to %d tables to admin role",
351- len(tables))
352- if tables:
353- tables_text = ', '.join(tables)
354- cur.execute("GRANT ALL ON TABLE %s TO GROUP admin" % tables_text)
355- nonsecure_tables = tables - set(SECURE_TABLES)
356- log.debug(
357- "Granting permissions to %d nonsecure tables to read role",
358- len(nonsecure_tables))
359- if nonsecure_tables:
360- nonsecure_tables_text = ', '.join(nonsecure_tables)
361- cur.execute(
362- "GRANT SELECT ON TABLE %s TO GROUP read" % nonsecure_tables_text)
363+ function_permissions.grant(cur)
364+ table_permissions.grant(cur)
365+ sequence_permissions.grant(cur)
366
367 # Set permissions on public schemas
368 public_schemas = [
369- s.strip() for s in config.get('DEFAULT','public_schemas').split(',')
370- if s.strip()
371- ]
372+ s.strip() for s in config.get('DEFAULT', 'public_schemas').split(',')
373+ if s.strip()]
374 log.debug("Granting access to %d public schemas", len(public_schemas))
375 for schema_name in public_schemas:
376 cur.execute("GRANT USAGE ON SCHEMA %s TO PUBLIC" % (
377@@ -444,7 +527,7 @@
378 if obj not in found:
379 forgotten.add(obj)
380 forgotten = [obj.fullname for obj in forgotten
381- if obj.type in ['table','function','view']]
382+ if obj.type in ['table', 'function', 'view']]
383 if forgotten:
384 log.warn('No permissions specified for %r', forgotten)
385