Merge lp:~jtv/launchpad/bug-181254 into lp:launchpad

Proposed by Jeroen T. Vermeulen
Status: Merged
Approved by: Jeroen T. Vermeulen
Approved revision: no longer in the source branch.
Merged at revision: 11096
Proposed branch: lp:~jtv/launchpad/bug-181254
Merge into: lp:launchpad
Diff against target: 505 lines (+361/-32)
4 files modified
lib/lp/testing/factory.py (+41/-1)
lib/lp/translations/doc/translationimportqueue.txt (+1/-0)
lib/lp/translations/model/translationimportqueue.py (+75/-23)
lib/lp/translations/tests/test_autoapproval.py (+244/-8)
To merge this branch: bzr merge lp:~jtv/launchpad/bug-181254
Reviewer Review Type Date Requested Status
Brad Crittenden (community) code Approve
Review via email: mp+29056@code.launchpad.net

Commit message

Scalability fix in translations import queue gardener.

Description of the change

= Bug 181254 =

This addresses a scalability problem in the translations import queue gardener. One of the things it does is go over all entries that need review to see which should be automatically blocked. For each entry that needs review (about 20K—30K per run during normal operation but potentially a lot more) it queries the blocked entries for ones representing templates for the same target, and usually finds nothing. This took up a significant chunk of the script's runtime in a test I ran ages ago.

In this branch I make those nested-loop queries unnecessary, fetching all the information at once instead. It's currently only 2640 tuples containing a few ints and a short string.

To test:
{{{
./bin/test -vv -m lp.translations -t autoapproval
}}}

No lint.

For Q/A, we can check that the number of blocked entries in the staging database does not jump. At the moment there are 70662, of which 2640 are templates. We can also verify that blocking still works.

Jeroen

To post a comment you must log in.
Revision history for this message
Brad Crittenden (bac) wrote :

Hi Jeroen,

Other than the lint issues and the out-of-date comment that we discussed on IRC this branch looks good to me. Thanks.

review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/testing/factory.py'
2--- lib/lp/testing/factory.py 2010-07-01 15:22:59 +0000
3+++ lib/lp/testing/factory.py 2010-07-02 14:56:31 +0000
4@@ -147,12 +147,16 @@
5 from lp.testing import run_with_login, time_counter, login, logout, temp_dir
6
7 from lp.translations.interfaces.potemplate import IPOTemplateSet
8+from lp.translations.interfaces.translationimportqueue import (
9+ RosettaImportStatus)
10 from lp.translations.interfaces.translationgroup import (
11 ITranslationGroupSet)
12 from lp.translations.interfaces.translationsperson import ITranslationsPerson
13-from lp.translations.interfaces.translator import ITranslatorSet
14 from lp.translations.interfaces.translationtemplatesbuildjob import (
15 ITranslationTemplatesBuildJobSource)
16+from lp.translations.interfaces.translator import ITranslatorSet
17+from lp.translations.model.translationimportqueue import (
18+ TranslationImportQueueEntry)
19
20
21 SPACE = ' '
22@@ -2033,6 +2037,42 @@
23 translation.is_imported = is_imported
24 translation.is_current = True
25
26+ def makeTranslationImportQueueEntry(self, path=None, status=None,
27+ sourcepackagename=None,
28+ distroseries=None,
29+ productseries=None, content=None,
30+ uploader=None, is_published=False):
31+ """Create a `TranslationImportQueueEntry`."""
32+ if path is None:
33+ path = self.getUniqueString() + '.pot'
34+ if status is None:
35+ status = RosettaImportStatus.NEEDS_REVIEW
36+
37+ if (sourcepackagename is None and distroseries is None):
38+ if productseries is None:
39+ productseries = self.makeProductSeries()
40+ else:
41+ if sourcepackagename is None:
42+ sourcepackagename = self.makeSourcePackageName()
43+ if distroseries is None:
44+ distroseries = self.makeDistroSeries()
45+
46+ if uploader is None:
47+ uploader = self.makePerson()
48+
49+ if content is None:
50+ content = self.getUniqueString()
51+
52+ content_reference = getUtility(ILibraryFileAliasSet).create(
53+ name=os.path.basename(path), size=len(content),
54+ file=StringIO(content), contentType='text/plain')
55+
56+ return TranslationImportQueueEntry(
57+ path=path, status=status, sourcepackagename=sourcepackagename,
58+ distroseries=distroseries, productseries=productseries,
59+ importer=uploader, content=content_reference,
60+ is_published=is_published)
61+
62 def makeMailingList(self, team, owner):
63 """Create a mailing list for the team."""
64 team_list = getUtility(IMailingListSet).new(team, owner)
65
66=== modified file 'lib/lp/translations/doc/translationimportqueue.txt'
67--- lib/lp/translations/doc/translationimportqueue.txt 2010-04-21 16:15:36 +0000
68+++ lib/lp/translations/doc/translationimportqueue.txt 2010-07-02 14:56:31 +0000
69@@ -785,6 +785,7 @@
70 We need it blocked for this test.
71
72 >>> entry5.setStatus(RosettaImportStatus.BLOCKED, rosetta_experts)
73+ >>> transaction.commit()
74
75 Let's see how many entries are blocked.
76
77
78=== modified file 'lib/lp/translations/model/translationimportqueue.py'
79--- lib/lp/translations/model/translationimportqueue.py 2010-04-29 20:41:08 +0000
80+++ lib/lp/translations/model/translationimportqueue.py 2010-07-02 14:56:31 +0000
81@@ -34,7 +34,7 @@
82 from canonical.launchpad.helpers import shortlist
83 from canonical.launchpad.interfaces.launchpad import (
84 ILaunchpadCelebrities, IPersonRoles)
85-from canonical.launchpad.interfaces.lpstorm import IMasterStore
86+from canonical.launchpad.interfaces.lpstorm import IMasterStore, ISlaveStore
87 from canonical.launchpad.webapp.interfaces import NotFoundError
88 from lp.registry.interfaces.distribution import IDistribution
89 from lp.registry.interfaces.series import SeriesStatus
90@@ -1204,32 +1204,84 @@
91
92 return there_are_entries_approved
93
94+ def _getSlaveStore(self):
95+ """Return the slave store for the import queue.
96+
97+ Tests can override this to avoid unnecessary synchronization
98+ issues.
99+ """
100+ return ISlaveStore(TranslationImportQueueEntry)
101+
102+ def _getBlockableDirectories(self):
103+ """Describe all directories where uploads are to be blocked.
104+
105+ Returns a set of tuples, each containing:
106+ * `DistroSeries` id
107+ * `SourcePackageName` id
108+ * `ProductSeries` id
109+ * Directory path.
110+
111+ A `TranslationImportQueueEntry` should be blocked if the tuple
112+ of its distroseries.id, sourcepackagename.id, productseries.id,
113+ and the directory component of its path is found in the result
114+ set.
115+
116+ See `_isBlockable`, which matches a queue entry against the set
117+ returned by this method.
118+ """
119+ importer = getUtility(ITranslationImporter)
120+ template_patterns = "(%s)" % ' OR '.join([
121+ "path LIKE ('%%' || %s)" % quote_like(suffix)
122+ for suffix in importer.template_suffixes
123+ ])
124+
125+ store = self._getSlaveStore()
126+ result = store.execute("""
127+ SELECT
128+ distroseries,
129+ sourcepackagename,
130+ productseries,
131+ regexp_replace(
132+ regexp_replace(path, '^[^/]*$', ''),
133+ '/[^/]*$',
134+ '') AS directory
135+ FROM TranslationImportQueueEntry
136+ WHERE %(is_template)s
137+ GROUP BY distroseries, sourcepackagename, productseries, directory
138+ HAVING bool_and(status = %(blocked)s)
139+ ORDER BY distroseries, sourcepackagename, productseries, directory
140+ """ % {
141+ 'blocked': quote(RosettaImportStatus.BLOCKED),
142+ 'is_template': template_patterns,
143+ })
144+
145+ return set(result)
146+
147+ def _isBlockable(self, entry, blocklist):
148+ """Is `entry` one that should be blocked according to `blocklist`?
149+
150+ :param entry: A `TranslationImportQueueEntry` that may be a
151+ candidate for blocking.
152+ :param blocklist: A description of blockable directories as
153+ returned by `_getBlockableDirectories`.
154+ """
155+ description = (
156+ entry.distroseries_id,
157+ entry.sourcepackagename_id,
158+ entry.productseries_id,
159+ os.path.dirname(entry.path),
160+ )
161+ return description in blocklist
162+
163 def executeOptimisticBlock(self, txn=None):
164 """See ITranslationImportQueue."""
165- importer = getUtility(ITranslationImporter)
166+ # Find entries where all template entries for the same
167+ # translation target that are in the same directory are in the
168+ # Blocked state. Set those entries to Blocked as well.
169+ blocklist = self._getBlockableDirectories()
170 num_blocked = 0
171 for entry in self._iterNeedsReview():
172- if importer.isTemplateName(entry.path):
173- # Templates cannot be managed automatically. Ignore them and
174- # wait for an admin to do it.
175- continue
176- # As kiko would say... this method is crack, I know it, but we
177- # need it to save time to our poor Rosetta Experts while handling
178- # the translation import queue...
179- # We need to look for all templates that we have on the same
180- # directory for the entry we are processing, and check that all of
181- # them are blocked. If there is at least one that's not blocked,
182- # we cannot block the entry.
183- templates = entry.getTemplatesOnSameDirectory()
184- has_templates = False
185- has_templates_unblocked = False
186- for template in templates:
187- has_templates = True
188- if template.status != RosettaImportStatus.BLOCKED:
189- # This template is not set as blocked, so we note it.
190- has_templates_unblocked = True
191-
192- if has_templates and not has_templates_unblocked:
193+ if self._isBlockable(entry, blocklist):
194 # All templates on the same directory as this entry are
195 # blocked, so we can block it too.
196 entry.setStatus(
197
198=== modified file 'lib/lp/translations/tests/test_autoapproval.py'
199--- lib/lp/translations/tests/test_autoapproval.py 2010-06-07 10:24:03 +0000
200+++ lib/lp/translations/tests/test_autoapproval.py 2010-07-02 14:56:31 +0000
201@@ -25,6 +25,7 @@
202 from lp.registry.model.sourcepackagename import (
203 SourcePackageName,
204 SourcePackageNameSet)
205+from lp.testing.fakemethod import FakeMethod
206 from lp.services.worlddata.model.language import Language, LanguageSet
207 from lp.translations.model.customlanguagecode import CustomLanguageCode
208 from lp.translations.model.pofile import POFile
209@@ -354,8 +355,7 @@
210 self._setUpDistro()
211 other_series = self.factory.makeDistroRelease(
212 distribution=self.distro)
213- other_template = self._makeTemplateForDistroSeries(
214- other_series, 'test1')
215+ self._makeTemplateForDistroSeries(other_series, 'test1')
216 self.distrotemplate1.iscurrent = False
217 self.distrotemplate2.iscurrent = True
218 self.distrotemplate1.from_sourcepackagename = None
219@@ -514,7 +514,7 @@
220
221 # The clashing entry goes through approval unsuccessfully, but
222 # without causing breakage.
223- entry2 = queue.addOrUpdateEntry(
224+ queue.addOrUpdateEntry(
225 'program/nl.po', 'other contents', False, template.owner,
226 productseries=template.productseries, potemplate=template)
227
228@@ -712,8 +712,7 @@
229 template = self.factory.makePOTemplate(
230 productseries=trunk, translation_domain='domain')
231 template.iscurrent = True
232- credits = self.factory.makePOTMsgSet(template, "translator-credits",
233- sequence=1)
234+ self.factory.makePOTMsgSet(template, "translator-credits", sequence=1)
235
236 entry = self.queue.addOrUpdateEntry(
237 'nl.po', '# ...', False, template.owner, productseries=trunk)
238@@ -825,7 +824,6 @@
239 self._exists(entry_id),
240 "Queue entry in state '%s' was not removed." % status)
241
242-
243 def test_cleanUpInactiveProductEntries(self):
244 # After a product is deactivated, _cleanUpInactiveProductEntries
245 # will clean up any entries it may have on the queue.
246@@ -886,7 +884,7 @@
247 # gardener has permissions to do this. The POFile's owner is
248 # the rosetta_experts team.
249 trunk = self.product.getSeries('trunk')
250- template = self._makeTemplate(trunk)
251+ self._makeTemplate(trunk)
252 entry = self._makeQueueEntry(trunk)
253 rosetta_experts = getUtility(ILaunchpadCelebrities).rosetta_experts
254
255@@ -910,11 +908,249 @@
256
257 self.becomeTheGardener()
258
259- pofile = entry.getGuessedPOFile()
260+ entry.getGuessedPOFile()
261
262 credits.getCurrentTranslationMessage(template, self.language)
263 self.assertNotEqual(None, credits)
264
265
266+class TestAutoBlocking(TestCaseWithFactory):
267+
268+ layer = LaunchpadZopelessLayer
269+
270+ def setUp(self):
271+ super(TestAutoBlocking, self).setUp()
272+ self.queue = TranslationImportQueue()
273+ # Our test queue operates on the master store instead of the
274+ # slave store so we don't have to synchronize stores.
275+ master_store = IMasterStore(TranslationImportQueueEntry)
276+ self.queue._getSlaveStore = FakeMethod(result=master_store)
277+
278+ def _copyTargetFromEntry(self, entry):
279+ """Return a dict representing `entry`'s translation target.
280+
281+ :param entry: An existing `TranslationImportQueueEntry`, or None.
282+ """
283+ if entry is None:
284+ return {}
285+ else:
286+ return {
287+ 'distroseries': entry.distroseries,
288+ 'sourcepackagename': entry.sourcepackagename,
289+ 'productseries': entry.productseries,
290+ }
291+
292+ def _makeTemplateEntry(self, suffix='.pot', directory=None, status=None,
293+ same_target_as=None):
294+ """Create an import queue entry for a template.
295+
296+ If `same_target_as` is given, creates an entry for the same
297+ translation target as `same_target_as`. This lets you create an
298+ entry for the same translation target as another one.
299+ """
300+ if suffix == '.xpi':
301+ basename = 'en-US'
302+ else:
303+ basename = self.factory.getUniqueString()
304+
305+ filename = basename + suffix
306+ if directory is None:
307+ path = filename
308+ else:
309+ path = '/'.join([directory, filename])
310+
311+ target = self._copyTargetFromEntry(same_target_as)
312+
313+ return self.factory.makeTranslationImportQueueEntry(
314+ path=path, status=status, **target)
315+
316+ def _makeTranslationEntry(self, path, status=None, same_target_as=None):
317+ """Create an import queue entry for a translation file.
318+
319+ If `same_target_as` is given, creates an entry for the same
320+ translation target as `same_target_as`. This lets you create an
321+ entry that may have to be blocked depending on same_target_as.
322+ """
323+ target = self._copyTargetFromEntry(same_target_as)
324+ return self.factory.makeTranslationImportQueueEntry(
325+ path=path, status=status, **target)
326+
327+ def test_getBlockableDirectories_checks_templates(self):
328+ old_blocklist = self.queue._getBlockableDirectories()
329+
330+ self._makeTemplateEntry(status=RosettaImportStatus.BLOCKED)
331+
332+ new_blocklist = self.queue._getBlockableDirectories()
333+
334+ self.assertEqual(len(old_blocklist) + 1, len(new_blocklist))
335+
336+ def test_getBlockableDirectories_ignores_translations(self):
337+ old_blocklist = self.queue._getBlockableDirectories()
338+
339+ self._makeTranslationEntry(
340+ 'gl.po', status=RosettaImportStatus.BLOCKED)
341+
342+ new_blocklist = self.queue._getBlockableDirectories()
343+
344+ self.assertEqual(len(old_blocklist), len(new_blocklist))
345+
346+ def test_getBlockableDirectories_checks_xpi_templates(self):
347+ old_blocklist = self.queue._getBlockableDirectories()
348+
349+ self._makeTemplateEntry(
350+ suffix='.xpi', status=RosettaImportStatus.BLOCKED)
351+
352+ new_blocklist = self.queue._getBlockableDirectories()
353+
354+ self.assertEqual(len(old_blocklist) + 1, len(new_blocklist))
355+
356+ def test_getBlockableDirectories_ignores_xpi_translations(self):
357+ old_blocklist = self.queue._getBlockableDirectories()
358+
359+ self._makeTranslationEntry(
360+ 'lt.xpi', status=RosettaImportStatus.BLOCKED)
361+
362+ new_blocklist = self.queue._getBlockableDirectories()
363+
364+ self.assertEqual(len(old_blocklist), len(new_blocklist))
365+
366+ def test_isBlockable_none(self):
367+ blocklist = self.queue._getBlockableDirectories()
368+ entry = self._makeTranslationEntry('nl.po')
369+ self.assertFalse(self.queue._isBlockable(entry, blocklist))
370+
371+ def test_isBlockable_one_blocked(self):
372+ blocked_template = self._makeTemplateEntry(
373+ status=RosettaImportStatus.BLOCKED)
374+ blocklist = self.queue._getBlockableDirectories()
375+
376+ translations = self._makeTranslationEntry(
377+ 'de.po', same_target_as=blocked_template)
378+
379+ self.assertTrue(self.queue._isBlockable(translations, blocklist))
380+
381+ def test_isBlockable_multiple_blocked(self):
382+ blocked1 = self._makeTemplateEntry(status=RosettaImportStatus.BLOCKED)
383+ self._makeTemplateEntry(
384+ status=RosettaImportStatus.BLOCKED, same_target_as=blocked1)
385+ blocklist = self.queue._getBlockableDirectories()
386+
387+ translations = self._makeTranslationEntry(
388+ 'lo.po', same_target_as=blocked1)
389+
390+ self.assertTrue(self.queue._isBlockable(translations, blocklist))
391+
392+ def test_isBlockable_one_unblocked(self):
393+ unblocked = self._makeTemplateEntry()
394+ blocklist = self.queue._getBlockableDirectories()
395+
396+ translations = self._makeTranslationEntry(
397+ 'xh.po', same_target_as=unblocked)
398+
399+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
400+
401+ def test_isBlockable_mixed(self):
402+ # When there are both blocked and unblocked template entries in
403+ # a directory, translation uploads for that directory are not
404+ # blocked.
405+ blocked = self._makeTemplateEntry(status=RosettaImportStatus.BLOCKED)
406+ self._makeTemplateEntry(same_target_as=blocked)
407+ blocklist = self.queue._getBlockableDirectories()
408+
409+ translations = self._makeTranslationEntry(
410+ 'fr.po', same_target_as=blocked)
411+
412+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
413+
414+ def test_getBlockableDirectories_path_rootdir_match(self):
415+ # _getBlockableDirectories matches sees a template and
416+ # translations file in the root directory as being in the same
417+ # directory.
418+ blocked = self._makeTemplateEntry(
419+ directory=None, status=RosettaImportStatus.BLOCKED)
420+ blocklist = self.queue._getBlockableDirectories()
421+ translations = self._makeTranslationEntry(
422+ 'es.po', same_target_as=blocked)
423+ self.assertTrue(self.queue._isBlockable(translations, blocklist))
424+
425+ def test_getBlockableDirectories_path_rootdir_nonmatch(self):
426+ # _getBlockableDirectories matches sees a template in the root
427+ # directory (i.e. without a directory component in its path) as
428+ # being in a different directory from a translations upload in a
429+ # subdirectory.
430+ blocked = self._makeTemplateEntry(
431+ directory=None, status=RosettaImportStatus.BLOCKED)
432+ blocklist = self.queue._getBlockableDirectories()
433+ translations = self._makeTranslationEntry(
434+ 'po/es.po', same_target_as=blocked)
435+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
436+
437+ def test_getBlockableDirectories_path_subdir_match(self):
438+ # _getBlockableDirectories matches sees a template and
439+ # translations file in the same directory as being in the same
440+ # directory.
441+ blocked = self._makeTemplateEntry(
442+ directory='po/module', status=RosettaImportStatus.BLOCKED)
443+ blocklist = self.queue._getBlockableDirectories()
444+ translations = self._makeTranslationEntry(
445+ 'po/module/es.po', same_target_as=blocked)
446+ self.assertTrue(self.queue._isBlockable(translations, blocklist))
447+
448+ def test_getBlockableDirectories_path_subdir_nonmatch(self):
449+ # _getBlockableDirectories matches sees a template in a
450+ # subdirectory as being in a different directory from a
451+ # translations upload in the root directory.
452+ blocked = self._makeTemplateEntry(
453+ directory='po', status=RosettaImportStatus.BLOCKED)
454+ blocklist = self.queue._getBlockableDirectories()
455+ translations = self._makeTranslationEntry(
456+ 'es.po', same_target_as=blocked)
457+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
458+
459+ def test_getBlockableDirectories_path_nested_translations(self):
460+ # _getBlockableDirectories sees a translations upload in a
461+ # subdirectory of that on the template upload as being in a
462+ # different directory.
463+ blocked = self._makeTemplateEntry(
464+ directory='po', status=RosettaImportStatus.BLOCKED)
465+ blocklist = self.queue._getBlockableDirectories()
466+ translations = self._makeTranslationEntry(
467+ 'po/module/es.po', same_target_as=blocked)
468+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
469+
470+ def test_getBlockableDirectories_path_nested_template(self):
471+ # _getBlockableDirectories sees a translations upload in one
472+ # directory and a template upload in a subdirectory of that
473+ # directory as being in different directories.
474+ blocked = self._makeTemplateEntry(
475+ directory='po/module', status=RosettaImportStatus.BLOCKED)
476+ blocklist = self.queue._getBlockableDirectories()
477+ translations = self._makeTranslationEntry(
478+ 'po/es.po', same_target_as=blocked)
479+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
480+
481+ def test_getBlockableDirectories_path_substring_translations(self):
482+ # _getBlockableDirectories sees the difference between a
483+ # template's directory and a translation upload's directory even
484+ # if the latter is a substring of the former.
485+ blocked = self._makeTemplateEntry(
486+ directory='po/moduleX', status=RosettaImportStatus.BLOCKED)
487+ blocklist = self.queue._getBlockableDirectories()
488+ translations = self._makeTranslationEntry(
489+ 'po/module/es.po', same_target_as=blocked)
490+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
491+
492+ def test_getBlockableDirectories_path_substring_template(self):
493+ # _getBlockableDirectories sees the difference between a
494+ # template's directory and a translation upload's directory even
495+ # if the former is a substring of the latter.
496+ blocked = self._makeTemplateEntry(
497+ directory='po/module', status=RosettaImportStatus.BLOCKED)
498+ blocklist = self.queue._getBlockableDirectories()
499+ translations = self._makeTranslationEntry(
500+ 'po/moduleX/es.po', same_target_as=blocked)
501+ self.assertFalse(self.queue._isBlockable(translations, blocklist))
502+
503+
504 def test_suite():
505 return TestLoader().loadTestsFromName(__name__)