Merge lp:~stub/launchpad/sanitize-db into lp:launchpad

Proposed by Stuart Bishop
Status: Merged
Approved by: Stuart Bishop
Approved revision: no longer in the source branch.
Merged at revision: 13201
Proposed branch: lp:~stub/launchpad/sanitize-db
Merge into: lp:launchpad
Diff against target: 715 lines (+0/-701)
3 files modified
lib/lp/scripts/utilities/sanitizedb.py (+0/-597)
lib/lp/scripts/utilities/tests/test_sanitizedb.py (+0/-89)
utilities/sanitize-db.py (+0/-15)
To merge this branch: bzr merge lp:~stub/launchpad/sanitize-db
Reviewer Review Type Date Requested Status
Stuart Bishop (community) Approve
Review via email: mp+64166@code.launchpad.net

Commit message

[r=stub][no-qa] Remove sanitizedb

Description of the change

= Summary =

sanitizedb is hard to maintain, doesn't meet any genuine use cases and takes days and huge resources to not meet them.

== Proposed fix ==

Nuke it from orbit.

== Pre-implementation notes ==

== Implementation details ==

== Tests ==

== Demo and Q/A ==

(No files to lint)

To post a comment you must log in.
Revision history for this message
Stuart Bishop (stub) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== removed file 'lib/lp/scripts/utilities/sanitizedb.py'
2--- lib/lp/scripts/utilities/sanitizedb.py 2011-05-27 19:53:20 +0000
3+++ lib/lp/scripts/utilities/sanitizedb.py 1970-01-01 00:00:00 +0000
4@@ -1,597 +0,0 @@
5-# Copyright 2009 Canonical Ltd. This software is licensed under the
6-# GNU Affero General Public License version 3 (see the file LICENSE).
7-
8-"""Scrub a Launchpad database of private data."""
9-
10-import _pythonpath
11-
12-
13-__metaclass__ = type
14-__all__ = []
15-
16-import re
17-import subprocess
18-import sys
19-
20-from storm.expr import (
21- Join,
22- Or,
23- )
24-import transaction
25-from zope.component import getUtility
26-
27-from canonical.database.constants import UTC_NOW
28-from canonical.database.sqlbase import cursor, sqlvalues
29-from canonical.database.postgresql import ConnectionString, listReferences
30-from canonical.launchpad.scripts.logger import DEBUG2, DEBUG3
31-from canonical.launchpad.webapp.interfaces import (
32- IStoreSelector, MAIN_STORE, MASTER_FLAVOR)
33-from canonical.lp import initZopeless
34-from lp.services.scripts.base import LaunchpadScript
35-
36-
37-class SanitizeDb(LaunchpadScript):
38- usage = "%prog [options] pg_connection_string"
39- description = "Destroy private information in a Launchpad database."
40-
41- def add_my_options(self):
42- self.parser.add_option(
43- "-f", "--force", action="store_true", default=False,
44- help="Force running against a possible production database.")
45- self.parser.add_option(
46- "-n", "--dry-run", action="store_true", default=False,
47- help="Don't commit changes.")
48-
49- def _init_db(self, isolation):
50- if len(self.args) == 0:
51- self.parser.error("PostgreSQL connection string required.")
52- elif len(self.args) > 1:
53- self.parser.error("Too many arguments.")
54-
55- self.pg_connection_string = ConnectionString(self.args[0])
56-
57- if ('prod' in str(self.pg_connection_string)
58- and not self.options.force):
59- self.parser.error(
60- "Attempting to sanitize a potential production database '%s'. "
61- "--force required." % self.pg_connection_string.dbname)
62-
63- self.logger.debug("Connect using '%s'." % self.pg_connection_string)
64-
65- self.txn = initZopeless(
66- dbname=self.pg_connection_string.dbname,
67- dbhost=self.pg_connection_string.host,
68- dbuser=self.pg_connection_string.user,
69- isolation=isolation)
70-
71- self.store = getUtility(IStoreSelector).get(MAIN_STORE, MASTER_FLAVOR)
72-
73- def main(self):
74- self.allForeignKeysCascade()
75- triggers_to_disable = [
76- ('bugmessage', 'set_bug_message_count_t'),
77- ('bugmessage', 'set_date_last_message_t'),
78- ]
79- self.disableTriggers(triggers_to_disable)
80-
81- tables_to_empty = [
82- 'accountpassword',
83- 'archiveauthtoken',
84- 'archivesubscriber',
85- 'authtoken',
86- 'buildqueue',
87- 'commercialsubscription',
88- 'entitlement',
89- 'job',
90- 'logintoken',
91- 'mailinglistban',
92- 'mailinglistsubscription',
93- 'oauthaccesstoken',
94- 'oauthconsumer',
95- 'oauthnonce',
96- 'oauthrequesttoken',
97- 'openidassociation',
98- 'openidconsumerassociation',
99- 'openidconsumernonce',
100- 'openidrpsummary',
101- 'openididentifier',
102- 'requestedcds',
103- 'scriptactivity',
104- 'shipitreport',
105- 'shipitsurvey',
106- 'shipitsurveyanswer',
107- 'shipitsurveyquestion',
108- 'shipitsurveyresult',
109- 'shipment',
110- 'shippingrequest',
111- 'shippingrun',
112- 'sprintattendance', # Is this private?
113- 'standardshipitrequest',
114- 'temporaryblobstorage',
115- 'usertouseremail',
116- 'vote',
117- 'votecast',
118- 'webserviceban',
119- ]
120- for table in tables_to_empty:
121- self.removeTableRows(table)
122-
123- self.removePrivatePeople()
124- self.removePrivateTeams()
125- self.removePrivateBugs()
126- self.removePrivateBugMessages()
127- self.removePrivateBranches()
128- self.removePrivateHwSubmissions()
129- self.removePrivateSpecifications()
130- self.removePrivateLocations()
131- self.removePrivateArchives()
132- self.removePrivateAnnouncements()
133- self.removePrivateLibrarianFiles()
134- self.removeInactiveProjects()
135- self.removeInactiveProducts()
136- self.removeInvalidEmailAddresses()
137- self.removePPAArchivePermissions()
138- self.scrambleHiddenEmailAddresses()
139-
140- self.removeDeactivatedPeopleAndAccounts()
141-
142- # Remove unlinked records. These might contain private data.
143- self.removeUnlinkedEmailAddresses()
144- self.removeUnlinkedAccounts()
145- self.removeUnlinked('revision', [
146- ('revisioncache', 'revision'),
147- ('revisionparent', 'revision'),
148- ('revisionproperty', 'revision'),
149- ])
150- self.removeUnlinked('libraryfilealias', [
151- ('libraryfiledownloadcount', 'libraryfilealias')])
152- self.removeUnlinked('libraryfilecontent')
153- self.removeUnlinked('message', [('messagechunk', 'message')])
154- self.removeUnlinked('staticdiff')
155- self.removeUnlinked('previewdiff')
156- self.removeUnlinked('diff')
157-
158- # Scrub data after removing all the records we are going to.
159- # No point scrubbing data that is going to get removed later.
160- columns_to_scrub = [
161- ('account', ['status_comment']),
162- ('distribution', ['reviewer_whiteboard']),
163- ('distributionmirror', ['whiteboard']),
164- ('hwsubmission', ['raw_emailaddress']),
165- ('nameblacklist', ['comment']),
166- ('person', [
167- 'personal_standing_reason',
168- 'mail_resumption_date']),
169- ('product', ['reviewer_whiteboard']),
170- ('project', ['reviewer_whiteboard']),
171- ('revisionauthor', ['email']),
172- ('signedcodeofconduct', ['admincomment']),
173- ]
174- for table, column in columns_to_scrub:
175- self.scrubColumn(table, column)
176-
177- self.enableTriggers(triggers_to_disable)
178- self.repairData()
179-
180- self.resetForeignKeysCascade()
181- if self.options.dry_run:
182- self.logger.info("Dry run - rolling back.")
183- transaction.abort()
184- else:
185- self.logger.info("Committing.")
186- transaction.commit()
187-
188- def removeDeactivatedPeopleAndAccounts(self):
189- """Remove all suspended and deactivated people & their accounts.
190-
191- Launchpad celebrities are ignored.
192- """
193- from canonical.launchpad.database.account import Account
194- from canonical.launchpad.database.emailaddress import EmailAddress
195- from canonical.launchpad.interfaces.account import AccountStatus
196- from lp.app.interfaces.launchpad import ILaunchpadCelebrities
197- from lp.registry.model.person import Person
198- celebrities = getUtility(ILaunchpadCelebrities)
199- # This is a slow operation due to the huge amount of cascading.
200- # We remove one row at a time for better reporting and PostgreSQL
201- # memory use.
202- deactivated_people = self.store.find(
203- Person,
204- Person.account == Account.id,
205- Account.status != AccountStatus.ACTIVE)
206- total_deactivated_count = deactivated_people.count()
207- deactivated_count = 0
208- for person in deactivated_people:
209- # Ignore celebrities
210- if celebrities.isCelebrityPerson(person.name):
211- continue
212- deactivated_count += 1
213- self.logger.debug(
214- "Removing %d of %d deactivated people (%s)",
215- deactivated_count, total_deactivated_count, person.name)
216- # Clean out the EmailAddress and Account for this person
217- # while we are here, making subsequent unbatched steps
218- # faster. These don't cascade due to the lack of a foreign
219- # key constraint between Person and EmailAddress, and the
220- # ON DELETE SET NULL foreign key constraint between
221- # EmailAddress and Account.
222- self.store.find(
223- EmailAddress, EmailAddress.person == person).remove()
224- self.store.find(Account, Account.id == person.accountID).remove()
225- self.store.remove(person)
226- self.store.flush()
227- self.logger.info(
228- "Removed %d suspended or deactivated people + email + accounts",
229- deactivated_count)
230-
231- def removePrivatePeople(self):
232- """Remove all private people."""
233- from lp.registry.interfaces.person import PersonVisibility
234- from lp.registry.model.person import Person
235- count = self.store.find(
236- Person,
237- Person.teamowner == None,
238- Person.visibility != PersonVisibility.PUBLIC).remove()
239- self.store.flush()
240- self.logger.info("Removed %d private people.", count)
241-
242- def removePrivateTeams(self):
243- """Remove all private people."""
244- from lp.registry.interfaces.person import PersonVisibility
245- from lp.registry.model.person import Person
246- count = self.store.find(
247- Person,
248- Person.teamowner != None,
249- Person.visibility != PersonVisibility.PUBLIC).remove()
250- self.store.flush()
251- self.logger.info("Removed %d private teams.", count)
252-
253- def removePrivateBugs(self):
254- """Remove all private bugs."""
255- from lp.bugs.model.bug import Bug
256- count = self.store.find(Bug, Bug.private == True).remove()
257- self.store.flush()
258- self.logger.info("Removed %d private bugs.", count)
259-
260- def removePrivateBugMessages(self):
261- """Remove all hidden bug messages."""
262- from lp.bugs.model.bugmessage import BugMessage
263- from lp.services.messages.model.message import Message
264- message_ids = list(self.store.using(*[
265- BugMessage,
266- Join(Message, BugMessage.messageID == Message.id),
267- ]).find(BugMessage.id, Message.visible == False))
268- self.store.flush()
269- count = self.store.find(
270- BugMessage, BugMessage.id.is_in(message_ids)).remove()
271- self.store.flush()
272- self.logger.info("Removed %d private bug messages.", count)
273-
274- def removePrivateBranches(self):
275- """Remove all private branches."""
276- from lp.code.model.branch import Branch
277- count = self.store.find(Branch, Branch.private == True).remove()
278- self.store.flush()
279- self.logger.info("Removed %d private branches.", count)
280-
281- def removePrivateHwSubmissions(self):
282- """Remove all private hardware submissions."""
283- from lp.hardwaredb.model.hwdb import HWSubmission
284- count = self.store.find(
285- HWSubmission, HWSubmission.private == True).remove()
286- self.store.flush()
287- self.logger.info("Removed %d private hardware submissions.", count)
288-
289- def removePrivateSpecifications(self):
290- """Remove all private specifications."""
291- from lp.blueprints.model.specification import Specification
292- count = self.store.find(
293- Specification, Specification.private == True).remove()
294- self.store.flush()
295- self.logger.info("Removed %d private specifications.", count)
296-
297- def removePrivateLocations(self):
298- """Remove private person locations."""
299- from lp.registry.model.personlocation import PersonLocation
300- count = self.store.find(
301- PersonLocation, PersonLocation.visible == False).remove()
302- self.store.flush()
303- self.logger.info("Removed %d person locations.", count)
304-
305- def removePrivateArchives(self):
306- """Remove private archives.
307-
308- This might over delete, but lets be conservative for now.
309- """
310- from lp.soyuz.model.archive import Archive
311- count = self.store.find(Archive, Archive.private == True).remove()
312- self.store.flush()
313- self.logger.info(
314- "Removed %d private archives.", count)
315-
316- def removePrivateAnnouncements(self):
317- """Remove announcements that have not yet been published."""
318- from lp.registry.model.announcement import Announcement
319- count = self.store.find(
320- Announcement, Or(
321- Announcement.date_announced == None,
322- Announcement.date_announced > UTC_NOW,
323- Announcement.active == False)).remove()
324- self.store.flush()
325- self.logger.info(
326- "Removed %d unpublished announcements.", count)
327-
328- def removePrivateLibrarianFiles(self):
329- """Remove librarian files only available via the restricted librarian.
330- """
331- from canonical.launchpad.database.librarian import LibraryFileAlias
332- count = self.store.find(
333- LibraryFileAlias, LibraryFileAlias.restricted == True).remove()
334- self.store.flush()
335- self.logger.info("Removed %d restricted librarian files.", count)
336-
337- def removeInactiveProjects(self):
338- """Remove inactive projects."""
339- from lp.registry.model.projectgroup import ProjectGroup
340- count = self.store.find(
341- ProjectGroup, ProjectGroup.active == False).remove()
342- self.store.flush()
343- self.logger.info("Removed %d inactive product groups.", count)
344-
345- def removeInactiveProducts(self):
346- """Remove inactive products."""
347- from lp.registry.model.product import Product
348- count = self.store.find(
349- Product, Product.active == False).remove()
350- self.store.flush()
351- self.logger.info("Removed %d inactive products.", count)
352-
353- def removeTableRows(self, table):
354- """Remove all data from a table."""
355- count = self.store.execute("DELETE FROM %s" % table).rowcount
356- self.store.execute("ANALYZE %s" % table)
357- self.logger.info("Removed %d %s rows (all).", count, table)
358-
359- def removeUnlinked(self, table, ignores=()):
360- """Remove all unlinked entries in the table.
361-
362- References from the ignores list are ignored.
363-
364- :param table: table name.
365-
366- :param ignores: list of (table, column) references to ignore.
367- """
368- references = []
369- for result in listReferences(cursor(), table, 'id'):
370- (from_table, from_column, to_table,
371- to_column, update, delete) = result
372- if (to_table == table and to_column == 'id'
373- and (from_table, from_column) not in ignores):
374- references.append(
375- "EXCEPT SELECT %s FROM %s" % (from_column, from_table))
376- query = (
377- "DELETE FROM %s USING (SELECT id FROM %s %s) AS Unreferenced "
378- "WHERE %s.id = Unreferenced.id"
379- % (table, table, ' '.join(references), table))
380- self.logger.log(DEBUG2, query)
381- count = self.store.execute(query).rowcount
382- self.logger.info("Removed %d unlinked %s rows.", count, table)
383-
384- def removeInvalidEmailAddresses(self):
385- """Remove all invalid and old email addresses."""
386- from canonical.launchpad.database.emailaddress import EmailAddress
387- from canonical.launchpad.interfaces.emailaddress import (
388- EmailAddressStatus)
389- count = self.store.find(
390- EmailAddress, Or(
391- EmailAddress.status == EmailAddressStatus.NEW,
392- EmailAddress.status == EmailAddressStatus.OLD,
393- EmailAddress.email.lower().like(
394- u'%@example.com', case_sensitive=True))).remove()
395- self.store.flush()
396- self.logger.info(
397- "Removed %d invalid, unvalidated and old email addresses.", count)
398-
399- def removePPAArchivePermissions(self):
400- """Remove ArchivePermission records for PPAs."""
401- from lp.soyuz.enums import ArchivePurpose
402- count = self.store.execute("""
403- DELETE FROM ArchivePermission
404- USING Archive
405- WHERE ArchivePermission.archive = Archive.id
406- AND Archive.purpose = %s
407- """ % sqlvalues(ArchivePurpose.PPA)).rowcount
408- self.logger.info(
409- "Removed %d ArchivePermission records linked to PPAs.", count)
410-
411- def scrambleHiddenEmailAddresses(self):
412- """Hide email addresses users have requested to not be public.
413-
414- Call after removeInvalidEmailAddresses to avoid any possible
415- name clashes.
416-
417- This replaces the email addresses of all people with
418- hide_email_addresses set with an @example.com email address.
419- """
420- # One day there might be Storm documentation telling me how to
421- # do this via the ORM.
422- count = self.store.execute("""
423- UPDATE EmailAddress
424- SET email='e' || text(EmailAddress.id) || '@example.com'
425- FROM Person
426- WHERE EmailAddress.person = Person.id
427- AND Person.hide_email_addresses IS TRUE
428- """).rowcount
429- self.logger.info(
430- "Replaced %d hidden email addresses with @example.com", count)
431-
432- def removeUnlinkedEmailAddresses(self):
433- """Remove EmailAddresses not linked to a Person.
434-
435- We call this before removeUnlinkedAccounts to avoid the
436- ON DELETE SET NULL overhead from the EmailAddress -> Account
437- foreign key constraint.
438- """
439- from canonical.launchpad.database.emailaddress import EmailAddress
440- count = self.store.find(
441- EmailAddress, EmailAddress.person == None).remove()
442- self.store.flush()
443- self.logger.info(
444- "Removed %d email addresses not linked to people.", count)
445-
446- def removeUnlinkedAccounts(self):
447- """Remove Accounts not linked to a Person."""
448- from canonical.launchpad.database.account import Account
449- from lp.registry.model.person import Person
450- all_accounts = self.store.find(Account)
451- linked_accounts = self.store.find(
452- Account, Account.id == Person.accountID)
453- unlinked_accounts = all_accounts.difference(linked_accounts)
454- total_unlinked_accounts = unlinked_accounts.count()
455- count = 0
456- for account in unlinked_accounts:
457- self.store.remove(account)
458- self.store.flush()
459- count += 1
460- self.logger.debug(
461- "Removed %d of %d unlinked accounts."
462- % (count, total_unlinked_accounts))
463- self.logger.info("Removed %d accounts not linked to a person", count)
464-
465- def scrubColumn(self, table, columns):
466- """Remove production admin related notes."""
467- query = ["UPDATE %s SET" % table]
468- for column in columns:
469- query.append("%s = NULL" % column)
470- query.append(",")
471- query.pop()
472- query.append("WHERE")
473- for column in columns:
474- query.append("%s IS NOT NULL" % column)
475- query.append("OR")
476- query.pop()
477- self.logger.log(DEBUG3, ' '.join(query))
478- count = self.store.execute(' '.join(query)).rowcount
479- self.logger.info(
480- "Scrubbed %d %s.{%s} entries."
481- % (count, table, ','.join(columns)))
482-
483- def allForeignKeysCascade(self):
484- """Set all foreign key constraints to ON DELETE CASCADE.
485-
486- The current state is recorded first so resetForeignKeysCascade
487- can repair the changes.
488-
489- Only tables in the public schema are modified.
490- """
491- # Get the SQL needed to create the foreign key constraints.
492- # pg_dump seems the only sane way of getting this. We could
493- # generate the SQL ourselves using the pg_constraints table,
494- # but that can change between PostgreSQL releases.
495- # Ideally we could use ALTER CONSTRAINT, but that doesn't exist.
496- # Or modify pg_constraints, but that doesn't work.
497- cmd = [
498- 'pg_dump', '--no-privileges', '--no-owner', '--schema-only',
499- '--schema=public']
500- cmd.extend(
501- self.pg_connection_string.asPGCommandLineArgs().split(' '))
502- self.logger.debug("Running %s", ' '.join(cmd))
503- pg_dump = subprocess.Popen(
504- cmd, stdout=subprocess.PIPE, stderr=subprocess.STDOUT,
505- stdin=subprocess.PIPE)
506- (pg_dump_out, pg_dump_err) = pg_dump.communicate()
507- if pg_dump.returncode != 0:
508- self.fail("pg_dump returned %d" % pg_dump.returncode)
509-
510- cascade_sql = []
511- restore_sql = []
512- pattern = r"""
513- (?x) ALTER \s+ TABLE \s+ ONLY \s+ (".*?"|\w+?) \s+
514- ADD \s+ CONSTRAINT \s+ (".*?"|\w+?) \s+ FOREIGN \s+ KEY [^;]+;
515- """
516- for match in re.finditer(pattern, pg_dump_out):
517- table = match.group(1)
518- constraint = match.group(2)
519-
520- sql = match.group(0)
521-
522- # Drop the existing constraint so we can recreate it.
523- drop_sql = 'ALTER TABLE %s DROP CONSTRAINT %s;' % (
524- table, constraint)
525- restore_sql.append(drop_sql)
526- cascade_sql.append(drop_sql)
527-
528- # Store the SQL needed to restore the constraint.
529- restore_sql.append(sql)
530-
531- # Recreate the constraint as ON DELETE CASCADE
532- sql = re.sub(r"""(?xs)^
533- (.*?)
534- (?:ON \s+ DELETE \s+ (?:NO\s+|SET\s+)?\w+)? \s*
535- ((?:NOT\s+)? DEFERRABLE|) \s*
536- (INITIALLY\s+(?:DEFERRED|IMMEDIATE)|) \s*;
537- """, r"\1 ON DELETE CASCADE \2 \3;", sql)
538- cascade_sql.append(sql)
539-
540- # Set all the foreign key constraints to ON DELETE CASCADE, really.
541- self.logger.info(
542- "Setting %d constraints to ON DELETE CASCADE",
543- len(cascade_sql) / 2)
544- for statement in cascade_sql:
545- self.logger.log(DEBUG3, statement)
546- self.store.execute(statement)
547-
548- # Store the recovery SQL.
549- self._reset_foreign_key_sql = restore_sql
550-
551- def resetForeignKeysCascade(self):
552- """Reset the foreign key constraints' ON DELETE mode."""
553- self.logger.info(
554- "Resetting %d foreign key constraints to initial state.",
555- len(self._reset_foreign_key_sql)/2)
556- for statement in self._reset_foreign_key_sql:
557- self.store.execute(statement)
558-
559- def disableTriggers(self, triggers_to_disable):
560- """Disable a set of triggers.
561-
562- :param triggers_to_disable: List of (table_name, trigger_name).
563- """
564- self.logger.debug("Disabling %d triggers." % len(triggers_to_disable))
565- for table_name, trigger_name in triggers_to_disable:
566- self.logger.debug(
567- "Disabling trigger %s.%s." % (table_name, trigger_name))
568- self.store.execute(
569- "ALTER TABLE %s DISABLE TRIGGER %s"
570- % (table_name, trigger_name))
571-
572- def enableTriggers(self, triggers_to_enable):
573- """Renable a set of triggers.
574-
575- :param triggers_to_enable: List of (table_name, trigger_name).
576- """
577- self.logger.debug("Enabling %d triggers." % len(triggers_to_enable))
578- for table_name, trigger_name in triggers_to_enable:
579- self.logger.debug(
580- "Enabling trigger %s.%s." % (table_name, trigger_name))
581- self.store.execute(
582- "ALTER TABLE %s ENABLE TRIGGER %s"
583- % (table_name, trigger_name))
584-
585- def repairData(self):
586- """After scrubbing, repair any data possibly damaged in the process.
587- """
588- # Repair Bug.message_count and Bug.date_last_message.
589- # The triggers where disabled while we where doing the cascading
590- # deletes because they fail (attempting to change a mutating table).
591- # We can repair these caches by forcing the triggers to run for
592- # every row.
593- self.store.execute("""
594- UPDATE Message SET visible=visible
595- FROM BugMessage
596- WHERE BugMessage.message = Message.id
597- """)
598-
599- def _fail(self, error_message):
600- self.logger.fatal(error_message)
601- sys.exit(1)
602
603=== removed file 'lib/lp/scripts/utilities/tests/test_sanitizedb.py'
604--- lib/lp/scripts/utilities/tests/test_sanitizedb.py 2010-08-20 20:31:18 +0000
605+++ lib/lp/scripts/utilities/tests/test_sanitizedb.py 1970-01-01 00:00:00 +0000
606@@ -1,89 +0,0 @@
607-# Copyright 2009 Canonical Ltd. This software is licensed under the
608-# GNU Affero General Public License version 3 (see the file LICENSE).
609-
610-"""Test the sanitize database script."""
611-
612-__metaclass__ = type
613-
614-import os.path
615-import subprocess
616-import unittest
617-
618-from canonical.config import (
619- config,
620- dbconfig,
621- )
622-from canonical.database.sqlbase import (
623- connect,
624- sqlvalues,
625- )
626-from canonical.testing.layers import DatabaseLayer
627-from lp.registry.interfaces.person import PersonVisibility
628-from lp.testing import TestCase
629-
630-
631-class SanitizeDbScriptTestCase(TestCase):
632- layer = DatabaseLayer
633-
634- def setUp(self):
635- super(SanitizeDbScriptTestCase, self).setUp()
636- self.script_path = os.path.join(
637- config.root, 'utilities', 'sanitize-db.py')
638- DatabaseLayer.force_dirty_database()
639-
640- def containsPrivateInformation(self):
641- # Return True if we detect known private information in the
642- # database.
643- con = connect('launchpad')
644- try:
645- cur = con.cursor()
646- cur.execute(
647- "SELECT TRUE FROM Person WHERE visibility <> %s LIMIT 1"
648- % sqlvalues(PersonVisibility.PUBLIC))
649- if cur.fetchone() is None:
650- return False
651- else:
652- return True
653- finally:
654- # Don't leave the connection dangling or it could block
655- # the script.
656- con.close()
657-
658- def runScript(self, *args):
659- cmd = [self.script_path, dbconfig.main_master + ' user=postgres']
660- cmd.extend(args)
661- process = subprocess.Popen(
662- cmd, stdin=subprocess.PIPE,
663- stdout=subprocess.PIPE, stderr=subprocess.PIPE)
664- (out, err) = process.communicate()
665- self.assertEqual(
666- process.returncode, 0,
667- "%s failed: %d\n%s" % (' '.join(cmd), process.returncode, err))
668- return (out, err)
669-
670- def test_script(self):
671- # Run the sanitize-db.py script and confirm it actually
672- # changes things.
673- self.assertTrue(
674- self.containsPrivateInformation(),
675- 'No private information detected.')
676- self.runScript()
677- self.assertFalse(
678- self.containsPrivateInformation(),
679- 'Private information not removed.')
680-
681- def test_script_dryrun(self):
682- # Run the sanitize-db.py script in --dry-run mode and
683- # confirm it doesn't change things.
684- self.assertTrue(
685- self.containsPrivateInformation(),
686- 'No private information detected.')
687- self.runScript('--dry-run')
688- self.assertTrue(
689- self.containsPrivateInformation(),
690- 'Private information removed.')
691-
692-
693-def test_suite():
694- return unittest.TestLoader().loadTestsFromName(__name__)
695-
696
697=== removed file 'utilities/sanitize-db.py'
698--- utilities/sanitize-db.py 2010-04-27 19:48:39 +0000
699+++ utilities/sanitize-db.py 1970-01-01 00:00:00 +0000
700@@ -1,15 +0,0 @@
701-#!/usr/bin/python -S
702-# Copyright 2009 Canonical Ltd. This software is licensed under the
703-# GNU Affero General Public License version 3 (see the file LICENSE).
704-
705-"""Scrub a Launchpad database of private data."""
706-
707-__metaclass__ = type
708-
709-# pylint: disable-msg=W0403
710-import _pythonpath
711-
712-from lp.scripts.utilities.sanitizedb import SanitizeDb
713-
714-if __name__ == '__main__':
715- SanitizeDb().lock_and_run()