Merge lp:~stub/launchpad/sanitize-db into lp:launchpad
- sanitize-db
- Merge into devel
Proposed by
Stuart Bishop
Status: | Merged |
---|---|
Approved by: | Stuart Bishop |
Approved revision: | no longer in the source branch. |
Merged at revision: | 13201 |
Proposed branch: | lp:~stub/launchpad/sanitize-db |
Merge into: | lp:launchpad |
Diff against target: |
715 lines (+0/-701) 3 files modified
lib/lp/scripts/utilities/sanitizedb.py (+0/-597) lib/lp/scripts/utilities/tests/test_sanitizedb.py (+0/-89) utilities/sanitize-db.py (+0/-15) |
To merge this branch: | bzr merge lp:~stub/launchpad/sanitize-db |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Stuart Bishop (community) | Approve | ||
Review via email: mp+64166@code.launchpad.net |
Commit message
[r=stub][no-qa] Remove sanitizedb
Description of the change
= Summary =
sanitizedb is hard to maintain, doesn't meet any genuine use cases and takes days and huge resources to not meet them.
== Proposed fix ==
Nuke it from orbit.
== Pre-implementation notes ==
== Implementation details ==
== Tests ==
== Demo and Q/A ==
(No files to lint)
To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === removed file 'lib/lp/scripts/utilities/sanitizedb.py' |
2 | --- lib/lp/scripts/utilities/sanitizedb.py 2011-05-27 19:53:20 +0000 |
3 | +++ lib/lp/scripts/utilities/sanitizedb.py 1970-01-01 00:00:00 +0000 |
4 | @@ -1,597 +0,0 @@ |
5 | -# Copyright 2009 Canonical Ltd. This software is licensed under the |
6 | -# GNU Affero General Public License version 3 (see the file LICENSE). |
7 | - |
8 | -"""Scrub a Launchpad database of private data.""" |
9 | - |
10 | -import _pythonpath |
11 | - |
12 | - |
13 | -__metaclass__ = type |
14 | -__all__ = [] |
15 | - |
16 | -import re |
17 | -import subprocess |
18 | -import sys |
19 | - |
20 | -from storm.expr import ( |
21 | - Join, |
22 | - Or, |
23 | - ) |
24 | -import transaction |
25 | -from zope.component import getUtility |
26 | - |
27 | -from canonical.database.constants import UTC_NOW |
28 | -from canonical.database.sqlbase import cursor, sqlvalues |
29 | -from canonical.database.postgresql import ConnectionString, listReferences |
30 | -from canonical.launchpad.scripts.logger import DEBUG2, DEBUG3 |
31 | -from canonical.launchpad.webapp.interfaces import ( |
32 | - IStoreSelector, MAIN_STORE, MASTER_FLAVOR) |
33 | -from canonical.lp import initZopeless |
34 | -from lp.services.scripts.base import LaunchpadScript |
35 | - |
36 | - |
37 | -class SanitizeDb(LaunchpadScript): |
38 | - usage = "%prog [options] pg_connection_string" |
39 | - description = "Destroy private information in a Launchpad database." |
40 | - |
41 | - def add_my_options(self): |
42 | - self.parser.add_option( |
43 | - "-f", "--force", action="store_true", default=False, |
44 | - help="Force running against a possible production database.") |
45 | - self.parser.add_option( |
46 | - "-n", "--dry-run", action="store_true", default=False, |
47 | - help="Don't commit changes.") |
48 | - |
49 | - def _init_db(self, isolation): |
50 | - if len(self.args) == 0: |
51 | - self.parser.error("PostgreSQL connection string required.") |
52 | - elif len(self.args) > 1: |
53 | - self.parser.error("Too many arguments.") |
54 | - |
55 | - self.pg_connection_string = ConnectionString(self.args[0]) |
56 | - |
57 | - if ('prod' in str(self.pg_connection_string) |
58 | - and not self.options.force): |
59 | - self.parser.error( |
60 | - "Attempting to sanitize a potential production database '%s'. " |
61 | - "--force required." % self.pg_connection_string.dbname) |
62 | - |
63 | - self.logger.debug("Connect using '%s'." % self.pg_connection_string) |
64 | - |
65 | - self.txn = initZopeless( |
66 | - dbname=self.pg_connection_string.dbname, |
67 | - dbhost=self.pg_connection_string.host, |
68 | - dbuser=self.pg_connection_string.user, |
69 | - isolation=isolation) |
70 | - |
71 | - self.store = getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR) |
72 | - |
73 | - def main(self): |
74 | - self.allForeignKeysCascade() |
75 | - triggers_to_disable = [ |
76 | - ('bugmessage', 'set_bug_message_count_t'), |
77 | - ('bugmessage', 'set_date_last_message_t'), |
78 | - ] |
79 | - self.disableTriggers(triggers_to_disable) |
80 | - |
81 | - tables_to_empty = [ |
82 | - 'accountpassword', |
83 | - 'archiveauthtoken', |
84 | - 'archivesubscriber', |
85 | - 'authtoken', |
86 | - 'buildqueue', |
87 | - 'commercialsubscription', |
88 | - 'entitlement', |
89 | - 'job', |
90 | - 'logintoken', |
91 | - 'mailinglistban', |
92 | - 'mailinglistsubscription', |
93 | - 'oauthaccesstoken', |
94 | - 'oauthconsumer', |
95 | - 'oauthnonce', |
96 | - 'oauthrequesttoken', |
97 | - 'openidassociation', |
98 | - 'openidconsumerassociation', |
99 | - 'openidconsumernonce', |
100 | - 'openidrpsummary', |
101 | - 'openididentifier', |
102 | - 'requestedcds', |
103 | - 'scriptactivity', |
104 | - 'shipitreport', |
105 | - 'shipitsurvey', |
106 | - 'shipitsurveyanswer', |
107 | - 'shipitsurveyquestion', |
108 | - 'shipitsurveyresult', |
109 | - 'shipment', |
110 | - 'shippingrequest', |
111 | - 'shippingrun', |
112 | - 'sprintattendance', # Is this private? |
113 | - 'standardshipitrequest', |
114 | - 'temporaryblobstorage', |
115 | - 'usertouseremail', |
116 | - 'vote', |
117 | - 'votecast', |
118 | - 'webserviceban', |
119 | - ] |
120 | - for table in tables_to_empty: |
121 | - self.removeTableRows(table) |
122 | - |
123 | - self.removePrivatePeople() |
124 | - self.removePrivateTeams() |
125 | - self.removePrivateBugs() |
126 | - self.removePrivateBugMessages() |
127 | - self.removePrivateBranches() |
128 | - self.removePrivateHwSubmissions() |
129 | - self.removePrivateSpecifications() |
130 | - self.removePrivateLocations() |
131 | - self.removePrivateArchives() |
132 | - self.removePrivateAnnouncements() |
133 | - self.removePrivateLibrarianFiles() |
134 | - self.removeInactiveProjects() |
135 | - self.removeInactiveProducts() |
136 | - self.removeInvalidEmailAddresses() |
137 | - self.removePPAArchivePermissions() |
138 | - self.scrambleHiddenEmailAddresses() |
139 | - |
140 | - self.removeDeactivatedPeopleAndAccounts() |
141 | - |
142 | - # Remove unlinked records. These might contain private data. |
143 | - self.removeUnlinkedEmailAddresses() |
144 | - self.removeUnlinkedAccounts() |
145 | - self.removeUnlinked('revision', [ |
146 | - ('revisioncache', 'revision'), |
147 | - ('revisionparent', 'revision'), |
148 | - ('revisionproperty', 'revision'), |
149 | - ]) |
150 | - self.removeUnlinked('libraryfilealias', [ |
151 | - ('libraryfiledownloadcount', 'libraryfilealias')]) |
152 | - self.removeUnlinked('libraryfilecontent') |
153 | - self.removeUnlinked('message', [('messagechunk', 'message')]) |
154 | - self.removeUnlinked('staticdiff') |
155 | - self.removeUnlinked('previewdiff') |
156 | - self.removeUnlinked('diff') |
157 | - |
158 | - # Scrub data after removing all the records we are going to. |
159 | - # No point scrubbing data that is going to get removed later. |
160 | - columns_to_scrub = [ |
161 | - ('account', ['status_comment']), |
162 | - ('distribution', ['reviewer_whiteboard']), |
163 | - ('distributionmirror', ['whiteboard']), |
164 | - ('hwsubmission', ['raw_emailaddress']), |
165 | - ('nameblacklist', ['comment']), |
166 | - ('person', [ |
167 | - 'personal_standing_reason', |
168 | - 'mail_resumption_date']), |
169 | - ('product', ['reviewer_whiteboard']), |
170 | - ('project', ['reviewer_whiteboard']), |
171 | - ('revisionauthor', ['email']), |
172 | - ('signedcodeofconduct', ['admincomment']), |
173 | - ] |
174 | - for table, column in columns_to_scrub: |
175 | - self.scrubColumn(table, column) |
176 | - |
177 | - self.enableTriggers(triggers_to_disable) |
178 | - self.repairData() |
179 | - |
180 | - self.resetForeignKeysCascade() |
181 | - if self.options.dry_run: |
182 | - self.logger.info("Dry run - rolling back.") |
183 | - transaction.abort() |
184 | - else: |
185 | - self.logger.info("Committing.") |
186 | - transaction.commit() |
187 | - |
188 | - def removeDeactivatedPeopleAndAccounts(self): |
189 | - """Remove all suspended and deactivated people & their accounts. |
190 | - |
191 | - Launchpad celebrities are ignored. |
192 | - """ |
193 | - from canonical.launchpad.database.account import Account |
194 | - from canonical.launchpad.database.emailaddress import EmailAddress |
195 | - from canonical.launchpad.interfaces.account import AccountStatus |
196 | - from lp.app.interfaces.launchpad import ILaunchpadCelebrities |
197 | - from lp.registry.model.person import Person |
198 | - celebrities = getUtility(ILaunchpadCelebrities) |
199 | - # This is a slow operation due to the huge amount of cascading. |
200 | - # We remove one row at a time for better reporting and PostgreSQL |
201 | - # memory use. |
202 | - deactivated_people = self.store.find( |
203 | - Person, |
204 | - Person.account == Account.id, |
205 | - Account.status != AccountStatus.ACTIVE) |
206 | - total_deactivated_count = deactivated_people.count() |
207 | - deactivated_count = 0 |
208 | - for person in deactivated_people: |
209 | - # Ignore celebrities |
210 | - if celebrities.isCelebrityPerson(person.name): |
211 | - continue |
212 | - deactivated_count += 1 |
213 | - self.logger.debug( |
214 | - "Removing %d of %d deactivated people (%s)", |
215 | - deactivated_count, total_deactivated_count, person.name) |
216 | - # Clean out the EmailAddress and Account for this person |
217 | - # while we are here, making subsequent unbatched steps |
218 | - # faster. These don't cascade due to the lack of a foreign |
219 | - # key constraint between Person and EmailAddress, and the |
220 | - # ON DELETE SET NULL foreign key constraint between |
221 | - # EmailAddress and Account. |
222 | - self.store.find( |
223 | - EmailAddress, EmailAddress.person == person).remove() |
224 | - self.store.find(Account, Account.id == person.accountID).remove() |
225 | - self.store.remove(person) |
226 | - self.store.flush() |
227 | - self.logger.info( |
228 | - "Removed %d suspended or deactivated people + email + accounts", |
229 | - deactivated_count) |
230 | - |
231 | - def removePrivatePeople(self): |
232 | - """Remove all private people.""" |
233 | - from lp.registry.interfaces.person import PersonVisibility |
234 | - from lp.registry.model.person import Person |
235 | - count = self.store.find( |
236 | - Person, |
237 | - Person.teamowner == None, |
238 | - Person.visibility != PersonVisibility.PUBLIC).remove() |
239 | - self.store.flush() |
240 | - self.logger.info("Removed %d private people.", count) |
241 | - |
242 | - def removePrivateTeams(self): |
243 | - """Remove all private people.""" |
244 | - from lp.registry.interfaces.person import PersonVisibility |
245 | - from lp.registry.model.person import Person |
246 | - count = self.store.find( |
247 | - Person, |
248 | - Person.teamowner != None, |
249 | - Person.visibility != PersonVisibility.PUBLIC).remove() |
250 | - self.store.flush() |
251 | - self.logger.info("Removed %d private teams.", count) |
252 | - |
253 | - def removePrivateBugs(self): |
254 | - """Remove all private bugs.""" |
255 | - from lp.bugs.model.bug import Bug |
256 | - count = self.store.find(Bug, Bug.private == True).remove() |
257 | - self.store.flush() |
258 | - self.logger.info("Removed %d private bugs.", count) |
259 | - |
260 | - def removePrivateBugMessages(self): |
261 | - """Remove all hidden bug messages.""" |
262 | - from lp.bugs.model.bugmessage import BugMessage |
263 | - from lp.services.messages.model.message import Message |
264 | - message_ids = list(self.store.using(*[ |
265 | - BugMessage, |
266 | - Join(Message, BugMessage.messageID == Message.id), |
267 | - ]).find(BugMessage.id, Message.visible == False)) |
268 | - self.store.flush() |
269 | - count = self.store.find( |
270 | - BugMessage, BugMessage.id.is_in(message_ids)).remove() |
271 | - self.store.flush() |
272 | - self.logger.info("Removed %d private bug messages.", count) |
273 | - |
274 | - def removePrivateBranches(self): |
275 | - """Remove all private branches.""" |
276 | - from lp.code.model.branch import Branch |
277 | - count = self.store.find(Branch, Branch.private == True).remove() |
278 | - self.store.flush() |
279 | - self.logger.info("Removed %d private branches.", count) |
280 | - |
281 | - def removePrivateHwSubmissions(self): |
282 | - """Remove all private hardware submissions.""" |
283 | - from lp.hardwaredb.model.hwdb import HWSubmission |
284 | - count = self.store.find( |
285 | - HWSubmission, HWSubmission.private == True).remove() |
286 | - self.store.flush() |
287 | - self.logger.info("Removed %d private hardware submissions.", count) |
288 | - |
289 | - def removePrivateSpecifications(self): |
290 | - """Remove all private specifications.""" |
291 | - from lp.blueprints.model.specification import Specification |
292 | - count = self.store.find( |
293 | - Specification, Specification.private == True).remove() |
294 | - self.store.flush() |
295 | - self.logger.info("Removed %d private specifications.", count) |
296 | - |
297 | - def removePrivateLocations(self): |
298 | - """Remove private person locations.""" |
299 | - from lp.registry.model.personlocation import PersonLocation |
300 | - count = self.store.find( |
301 | - PersonLocation, PersonLocation.visible == False).remove() |
302 | - self.store.flush() |
303 | - self.logger.info("Removed %d person locations.", count) |
304 | - |
305 | - def removePrivateArchives(self): |
306 | - """Remove private archives. |
307 | - |
308 | - This might over delete, but lets be conservative for now. |
309 | - """ |
310 | - from lp.soyuz.model.archive import Archive |
311 | - count = self.store.find(Archive, Archive.private == True).remove() |
312 | - self.store.flush() |
313 | - self.logger.info( |
314 | - "Removed %d private archives.", count) |
315 | - |
316 | - def removePrivateAnnouncements(self): |
317 | - """Remove announcements that have not yet been published.""" |
318 | - from lp.registry.model.announcement import Announcement |
319 | - count = self.store.find( |
320 | - Announcement, Or( |
321 | - Announcement.date_announced == None, |
322 | - Announcement.date_announced > UTC_NOW, |
323 | - Announcement.active == False)).remove() |
324 | - self.store.flush() |
325 | - self.logger.info( |
326 | - "Removed %d unpublished announcements.", count) |
327 | - |
328 | - def removePrivateLibrarianFiles(self): |
329 | - """Remove librarian files only available via the restricted librarian. |
330 | - """ |
331 | - from canonical.launchpad.database.librarian import LibraryFileAlias |
332 | - count = self.store.find( |
333 | - LibraryFileAlias, LibraryFileAlias.restricted == True).remove() |
334 | - self.store.flush() |
335 | - self.logger.info("Removed %d restricted librarian files.", count) |
336 | - |
337 | - def removeInactiveProjects(self): |
338 | - """Remove inactive projects.""" |
339 | - from lp.registry.model.projectgroup import ProjectGroup |
340 | - count = self.store.find( |
341 | - ProjectGroup, ProjectGroup.active == False).remove() |
342 | - self.store.flush() |
343 | - self.logger.info("Removed %d inactive product groups.", count) |
344 | - |
345 | - def removeInactiveProducts(self): |
346 | - """Remove inactive products.""" |
347 | - from lp.registry.model.product import Product |
348 | - count = self.store.find( |
349 | - Product, Product.active == False).remove() |
350 | - self.store.flush() |
351 | - self.logger.info("Removed %d inactive products.", count) |
352 | - |
353 | - def removeTableRows(self, table): |
354 | - """Remove all data from a table.""" |
355 | - count = self.store.execute("DELETE FROM %s" % table).rowcount |
356 | - self.store.execute("ANALYZE %s" % table) |
357 | - self.logger.info("Removed %d %s rows (all).", count, table) |
358 | - |
359 | - def removeUnlinked(self, table, ignores=()): |
360 | - """Remove all unlinked entries in the table. |
361 | - |
362 | - References from the ignores list are ignored. |
363 | - |
364 | - :param table: table name. |
365 | - |
366 | - :param ignores: list of (table, column) references to ignore. |
367 | - """ |
368 | - references = [] |
369 | - for result in listReferences(cursor(), table, 'id'): |
370 | - (from_table, from_column, to_table, |
371 | - to_column, update, delete) = result |
372 | - if (to_table == table and to_column == 'id' |
373 | - and (from_table, from_column) not in ignores): |
374 | - references.append( |
375 | - "EXCEPT SELECT %s FROM %s" % (from_column, from_table)) |
376 | - query = ( |
377 | - "DELETE FROM %s USING (SELECT id FROM %s %s) AS Unreferenced " |
378 | - "WHERE %s.id = Unreferenced.id" |
379 | - % (table, table, ' '.join(references), table)) |
380 | - self.logger.log(DEBUG2, query) |
381 | - count = self.store.execute(query).rowcount |
382 | - self.logger.info("Removed %d unlinked %s rows.", count, table) |
383 | - |
384 | - def removeInvalidEmailAddresses(self): |
385 | - """Remove all invalid and old email addresses.""" |
386 | - from canonical.launchpad.database.emailaddress import EmailAddress |
387 | - from canonical.launchpad.interfaces.emailaddress import ( |
388 | - EmailAddressStatus) |
389 | - count = self.store.find( |
390 | - EmailAddress, Or( |
391 | - EmailAddress.status == EmailAddressStatus.NEW, |
392 | - EmailAddress.status == EmailAddressStatus.OLD, |
393 | - EmailAddress.email.lower().like( |
394 | - u'%@example.com', case_sensitive=True))).remove() |
395 | - self.store.flush() |
396 | - self.logger.info( |
397 | - "Removed %d invalid, unvalidated and old email addresses.", count) |
398 | - |
399 | - def removePPAArchivePermissions(self): |
400 | - """Remove ArchivePermission records for PPAs.""" |
401 | - from lp.soyuz.enums import ArchivePurpose |
402 | - count = self.store.execute(""" |
403 | - DELETE FROM ArchivePermission |
404 | - USING Archive |
405 | - WHERE ArchivePermission.archive = Archive.id |
406 | - AND Archive.purpose = %s |
407 | - """ % sqlvalues(ArchivePurpose.PPA)).rowcount |
408 | - self.logger.info( |
409 | - "Removed %d ArchivePermission records linked to PPAs.", count) |
410 | - |
411 | - def scrambleHiddenEmailAddresses(self): |
412 | - """Hide email addresses users have requested to not be public. |
413 | - |
414 | - Call after removeInvalidEmailAddresses to avoid any possible |
415 | - name clashes. |
416 | - |
417 | - This replaces the email addresses of all people with |
418 | - hide_email_addresses set with an @example.com email address. |
419 | - """ |
420 | - # One day there might be Storm documentation telling me how to |
421 | - # do this via the ORM. |
422 | - count = self.store.execute(""" |
423 | - UPDATE EmailAddress |
424 | - SET email='e' || text(EmailAddress.id) || '@example.com' |
425 | - FROM Person |
426 | - WHERE EmailAddress.person = Person.id |
427 | - AND Person.hide_email_addresses IS TRUE |
428 | - """).rowcount |
429 | - self.logger.info( |
430 | - "Replaced %d hidden email addresses with @example.com", count) |
431 | - |
432 | - def removeUnlinkedEmailAddresses(self): |
433 | - """Remove EmailAddresses not linked to a Person. |
434 | - |
435 | - We call this before removeUnlinkedAccounts to avoid the |
436 | - ON DELETE SET NULL overhead from the EmailAddress -> Account |
437 | - foreign key constraint. |
438 | - """ |
439 | - from canonical.launchpad.database.emailaddress import EmailAddress |
440 | - count = self.store.find( |
441 | - EmailAddress, EmailAddress.person == None).remove() |
442 | - self.store.flush() |
443 | - self.logger.info( |
444 | - "Removed %d email addresses not linked to people.", count) |
445 | - |
446 | - def removeUnlinkedAccounts(self): |
447 | - """Remove Accounts not linked to a Person.""" |
448 | - from canonical.launchpad.database.account import Account |
449 | - from lp.registry.model.person import Person |
450 | - all_accounts = self.store.find(Account) |
451 | - linked_accounts = self.store.find( |
452 | - Account, Account.id == Person.accountID) |
453 | - unlinked_accounts = all_accounts.difference(linked_accounts) |
454 | - total_unlinked_accounts = unlinked_accounts.count() |
455 | - count = 0 |
456 | - for account in unlinked_accounts: |
457 | - self.store.remove(account) |
458 | - self.store.flush() |
459 | - count += 1 |
460 | - self.logger.debug( |
461 | - "Removed %d of %d unlinked accounts." |
462 | - % (count, total_unlinked_accounts)) |
463 | - self.logger.info("Removed %d accounts not linked to a person", count) |
464 | - |
465 | - def scrubColumn(self, table, columns): |
466 | - """Remove production admin related notes.""" |
467 | - query = ["UPDATE %s SET" % table] |
468 | - for column in columns: |
469 | - query.append("%s = NULL" % column) |
470 | - query.append(",") |
471 | - query.pop() |
472 | - query.append("WHERE") |
473 | - for column in columns: |
474 | - query.append("%s IS NOT NULL" % column) |
475 | - query.append("OR") |
476 | - query.pop() |
477 | - self.logger.log(DEBUG3, ' '.join(query)) |
478 | - count = self.store.execute(' '.join(query)).rowcount |
479 | - self.logger.info( |
480 | - "Scrubbed %d %s.{%s} entries." |
481 | - % (count, table, ','.join(columns))) |
482 | - |
483 | - def allForeignKeysCascade(self): |
484 | - """Set all foreign key constraints to ON DELETE CASCADE. |
485 | - |
486 | - The current state is recorded first so resetForeignKeysCascade |
487 | - can repair the changes. |
488 | - |
489 | - Only tables in the public schema are modified. |
490 | - """ |
491 | - # Get the SQL needed to create the foreign key constraints. |
492 | - # pg_dump seems the only sane way of getting this. We could |
493 | - # generate the SQL ourselves using the pg_constraints table, |
494 | - # but that can change between PostgreSQL releases. |
495 | - # Ideally we could use ALTER CONSTRAINT, but that doesn't exist. |
496 | - # Or modify pg_constraints, but that doesn't work. |
497 | - cmd = [ |
498 | - 'pg_dump', '--no-privileges', '--no-owner', '--schema-only', |
499 | - '--schema=public'] |
500 | - cmd.extend( |
501 | - self.pg_connection_string.asPGCommandLineArgs().split(' ')) |
502 | - self.logger.debug("Running %s", ' '.join(cmd)) |
503 | - pg_dump = subprocess.Popen( |
504 | - cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT, |
505 | - stdin=subprocess.PIPE) |
506 | - (pg_dump_out, pg_dump_err) = pg_dump.communicate() |
507 | - if pg_dump.returncode != 0: |
508 | - self.fail("pg_dump returned %d" % pg_dump.returncode) |
509 | - |
510 | - cascade_sql = [] |
511 | - restore_sql = [] |
512 | - pattern = r""" |
513 | - (?x) ALTER \s+ TABLE \s+ ONLY \s+ (".*?"|\w+?) \s+ |
514 | - ADD \s+ CONSTRAINT \s+ (".*?"|\w+?) \s+ FOREIGN \s+ KEY [^;]+; |
515 | - """ |
516 | - for match in re.finditer(pattern, pg_dump_out): |
517 | - table = match.group(1) |
518 | - constraint = match.group(2) |
519 | - |
520 | - sql = match.group(0) |
521 | - |
522 | - # Drop the existing constraint so we can recreate it. |
523 | - drop_sql = 'ALTER TABLE %s DROP CONSTRAINT %s;' % ( |
524 | - table, constraint) |
525 | - restore_sql.append(drop_sql) |
526 | - cascade_sql.append(drop_sql) |
527 | - |
528 | - # Store the SQL needed to restore the constraint. |
529 | - restore_sql.append(sql) |
530 | - |
531 | - # Recreate the constraint as ON DELETE CASCADE |
532 | - sql = re.sub(r"""(?xs)^ |
533 | - (.*?) |
534 | - (?:ON \s+ DELETE \s+ (?:NO\s+|SET\s+)?\w+)? \s* |
535 | - ((?:NOT\s+)? DEFERRABLE|) \s* |
536 | - (INITIALLY\s+(?:DEFERRED|IMMEDIATE)|) \s*; |
537 | - """, r"\1 ON DELETE CASCADE \2 \3;", sql) |
538 | - cascade_sql.append(sql) |
539 | - |
540 | - # Set all the foreign key constraints to ON DELETE CASCADE, really. |
541 | - self.logger.info( |
542 | - "Setting %d constraints to ON DELETE CASCADE", |
543 | - len(cascade_sql) / 2) |
544 | - for statement in cascade_sql: |
545 | - self.logger.log(DEBUG3, statement) |
546 | - self.store.execute(statement) |
547 | - |
548 | - # Store the recovery SQL. |
549 | - self._reset_foreign_key_sql = restore_sql |
550 | - |
551 | - def resetForeignKeysCascade(self): |
552 | - """Reset the foreign key constraints' ON DELETE mode.""" |
553 | - self.logger.info( |
554 | - "Resetting %d foreign key constraints to initial state.", |
555 | - len(self._reset_foreign_key_sql)/2) |
556 | - for statement in self._reset_foreign_key_sql: |
557 | - self.store.execute(statement) |
558 | - |
559 | - def disableTriggers(self, triggers_to_disable): |
560 | - """Disable a set of triggers. |
561 | - |
562 | - :param triggers_to_disable: List of (table_name, trigger_name). |
563 | - """ |
564 | - self.logger.debug("Disabling %d triggers." % len(triggers_to_disable)) |
565 | - for table_name, trigger_name in triggers_to_disable: |
566 | - self.logger.debug( |
567 | - "Disabling trigger %s.%s." % (table_name, trigger_name)) |
568 | - self.store.execute( |
569 | - "ALTER TABLE %s DISABLE TRIGGER %s" |
570 | - % (table_name, trigger_name)) |
571 | - |
572 | - def enableTriggers(self, triggers_to_enable): |
573 | - """Renable a set of triggers. |
574 | - |
575 | - :param triggers_to_enable: List of (table_name, trigger_name). |
576 | - """ |
577 | - self.logger.debug("Enabling %d triggers." % len(triggers_to_enable)) |
578 | - for table_name, trigger_name in triggers_to_enable: |
579 | - self.logger.debug( |
580 | - "Enabling trigger %s.%s." % (table_name, trigger_name)) |
581 | - self.store.execute( |
582 | - "ALTER TABLE %s ENABLE TRIGGER %s" |
583 | - % (table_name, trigger_name)) |
584 | - |
585 | - def repairData(self): |
586 | - """After scrubbing, repair any data possibly damaged in the process. |
587 | - """ |
588 | - # Repair Bug.message_count and Bug.date_last_message. |
589 | - # The triggers where disabled while we where doing the cascading |
590 | - # deletes because they fail (attempting to change a mutating table). |
591 | - # We can repair these caches by forcing the triggers to run for |
592 | - # every row. |
593 | - self.store.execute(""" |
594 | - UPDATE Message SET visible=visible |
595 | - FROM BugMessage |
596 | - WHERE BugMessage.message = Message.id |
597 | - """) |
598 | - |
599 | - def _fail(self, error_message): |
600 | - self.logger.fatal(error_message) |
601 | - sys.exit(1) |
602 | |
603 | === removed file 'lib/lp/scripts/utilities/tests/test_sanitizedb.py' |
604 | --- lib/lp/scripts/utilities/tests/test_sanitizedb.py 2010-08-20 20:31:18 +0000 |
605 | +++ lib/lp/scripts/utilities/tests/test_sanitizedb.py 1970-01-01 00:00:00 +0000 |
606 | @@ -1,89 +0,0 @@ |
607 | -# Copyright 2009 Canonical Ltd. This software is licensed under the |
608 | -# GNU Affero General Public License version 3 (see the file LICENSE). |
609 | - |
610 | -"""Test the sanitize database script.""" |
611 | - |
612 | -__metaclass__ = type |
613 | - |
614 | -import os.path |
615 | -import subprocess |
616 | -import unittest |
617 | - |
618 | -from canonical.config import ( |
619 | - config, |
620 | - dbconfig, |
621 | - ) |
622 | -from canonical.database.sqlbase import ( |
623 | - connect, |
624 | - sqlvalues, |
625 | - ) |
626 | -from canonical.testing.layers import DatabaseLayer |
627 | -from lp.registry.interfaces.person import PersonVisibility |
628 | -from lp.testing import TestCase |
629 | - |
630 | - |
631 | -class SanitizeDbScriptTestCase(TestCase): |
632 | - layer = DatabaseLayer |
633 | - |
634 | - def setUp(self): |
635 | - super(SanitizeDbScriptTestCase, self).setUp() |
636 | - self.script_path = os.path.join( |
637 | - config.root, 'utilities', 'sanitize-db.py') |
638 | - DatabaseLayer.force_dirty_database() |
639 | - |
640 | - def containsPrivateInformation(self): |
641 | - # Return True if we detect known private information in the |
642 | - # database. |
643 | - con = connect('launchpad') |
644 | - try: |
645 | - cur = con.cursor() |
646 | - cur.execute( |
647 | - "SELECT TRUE FROM Person WHERE visibility <> %s LIMIT 1" |
648 | - % sqlvalues(PersonVisibility.PUBLIC)) |
649 | - if cur.fetchone() is None: |
650 | - return False |
651 | - else: |
652 | - return True |
653 | - finally: |
654 | - # Don't leave the connection dangling or it could block |
655 | - # the script. |
656 | - con.close() |
657 | - |
658 | - def runScript(self, *args): |
659 | - cmd = [self.script_path, dbconfig.main_master + ' user=postgres'] |
660 | - cmd.extend(args) |
661 | - process = subprocess.Popen( |
662 | - cmd, stdin=subprocess.PIPE, |
663 | - stdout=subprocess.PIPE, stderr=subprocess.PIPE) |
664 | - (out, err) = process.communicate() |
665 | - self.assertEqual( |
666 | - process.returncode, 0, |
667 | - "%s failed: %d\n%s" % (' '.join(cmd), process.returncode, err)) |
668 | - return (out, err) |
669 | - |
670 | - def test_script(self): |
671 | - # Run the sanitize-db.py script and confirm it actually |
672 | - # changes things. |
673 | - self.assertTrue( |
674 | - self.containsPrivateInformation(), |
675 | - 'No private information detected.') |
676 | - self.runScript() |
677 | - self.assertFalse( |
678 | - self.containsPrivateInformation(), |
679 | - 'Private information not removed.') |
680 | - |
681 | - def test_script_dryrun(self): |
682 | - # Run the sanitize-db.py script in --dry-run mode and |
683 | - # confirm it doesn't change things. |
684 | - self.assertTrue( |
685 | - self.containsPrivateInformation(), |
686 | - 'No private information detected.') |
687 | - self.runScript('--dry-run') |
688 | - self.assertTrue( |
689 | - self.containsPrivateInformation(), |
690 | - 'Private information removed.') |
691 | - |
692 | - |
693 | -def test_suite(): |
694 | - return unittest.TestLoader().loadTestsFromName(__name__) |
695 | - |
696 | |
697 | === removed file 'utilities/sanitize-db.py' |
698 | --- utilities/sanitize-db.py 2010-04-27 19:48:39 +0000 |
699 | +++ utilities/sanitize-db.py 1970-01-01 00:00:00 +0000 |
700 | @@ -1,15 +0,0 @@ |
701 | -#!/usr/bin/python -S |
702 | -# Copyright 2009 Canonical Ltd. This software is licensed under the |
703 | -# GNU Affero General Public License version 3 (see the file LICENSE). |
704 | - |
705 | -"""Scrub a Launchpad database of private data.""" |
706 | - |
707 | -__metaclass__ = type |
708 | - |
709 | -# pylint: disable-msg=W0403 |
710 | -import _pythonpath |
711 | - |
712 | -from lp.scripts.utilities.sanitizedb import SanitizeDb |
713 | - |
714 | -if __name__ == '__main__': |
715 | - SanitizeDb().lock_and_run() |