Merge lp:~wgrant/launchpad/tm-performance-4-queries into lp:launchpad

Proposed by William Grant on 2015-02-28
Status: Work in progress
Proposed branch: lp:~wgrant/launchpad/tm-performance-4-queries
Merge into: lp:launchpad
Prerequisite: lp:~wgrant/launchpad/tm-performance-2-model
Diff against target: 496 lines (+105/-210)
3 files modified
lib/lp/translations/model/pofile.py (+74/-172)
lib/lp/translations/model/potmsgset.py (+28/-36)
lib/lp/translations/tests/test_suggestions.py (+3/-2)
To merge this branch: bzr merge lp:~wgrant/launchpad/tm-performance-4-queries
Reviewer Review Type Date Requested Status
Launchpad code reviewers 2015-02-28 Pending
Review via email: mp+251345@code.launchpad.net
To post a comment you must log in.
17375. By William Grant on 2015-02-28

Drop unused functions.

17376. By William Grant on 2015-02-28

Merge bug-736005-trivialise.

17377. By William Grant on 2015-02-28

Text searches now use TTI.msgid_*.

17378. By William Grant on 2015-02-28

POFile POTMsgSet filters now use TTI denormed columns.

Unmerged revisions

17378. By William Grant on 2015-02-28

POFile POTMsgSet filters now use TTI denormed columns.

17377. By William Grant on 2015-02-28

Text searches now use TTI.msgid_*.

17376. By William Grant on 2015-02-28

Merge bug-736005-trivialise.

17375. By William Grant on 2015-02-28

Drop unused functions.

17374. By William Grant on 2015-02-28

Merge tm-performance-2-model.

17373. By William Grant on 2015-02-27

Translation suggestion queries now use the denormed columns.

17372. By William Grant on 2015-02-27

Fix POTMsgSet.suggestive clone and tests.

17371. By William Grant on 2015-02-27

Set POTemplate.suggestive based on normal suggestiveness criteria, and add POTemplateSet.filterSuggestiveTargets with the same.

17370. By William Grant on 2015-02-27

Propagate suggestive down from POTemplate to POTMsgSet to TranslationMessage.

17369. By William Grant on 2015-02-27

submitSuggestion now uses _makeTranslationMessage.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/translations/model/pofile.py'
2--- lib/lp/translations/model/pofile.py 2015-02-27 22:58:59 +0000
3+++ lib/lp/translations/model/pofile.py 2015-02-28 01:27:16 +0000
4@@ -13,6 +13,7 @@
5 ]
6
7 import datetime
8+from operator import itemgetter
9
10 import pytz
11 from sqlobject import (
12@@ -50,6 +51,7 @@
13 from lp.registry.interfaces.person import validate_public_person
14 from lp.services.database.constants import UTC_NOW
15 from lp.services.database.datetimecol import UtcDateTimeCol
16+from lp.services.database.decoratedresultset import DecoratedResultSet
17 from lp.services.database.interfaces import (
18 IMasterStore,
19 IStore,
20@@ -57,7 +59,6 @@
21 from lp.services.database.sqlbase import (
22 flush_database_updates,
23 quote,
24- quote_like,
25 SQLBase,
26 sqlvalues,
27 )
28@@ -97,11 +98,9 @@
29 credits_message_str,
30 POTMsgSet,
31 )
32+from lp.translations.model.potranslation import POTranslation
33 from lp.translations.model.translationimportqueue import collect_import_info
34-from lp.translations.model.translationmessage import (
35- make_plurals_sql_fragment,
36- TranslationMessage,
37- )
38+from lp.translations.model.translationmessage import TranslationMessage
39 from lp.translations.model.translationtemplateitem import (
40 TranslationTemplateItem,
41 )
42@@ -112,17 +111,6 @@
43 )
44
45
46-def compose_sql_translationmessage_has_translations(tm_sql_identifier):
47- """Compose SQL for "`TranslationMessage` is nonempty.".
48-
49- :param tm_sql_identifier: The SQL identifier for the
50- `TranslationMessage` in the query that's to be tested.
51- """
52- return "COALESCE(%s) IS NOT NULL" % ", ".join([
53- "%s.msgstr%d" % (tm_sql_identifier, form)
54- for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)])
55-
56-
57 class POFileMixIn(RosettaStats):
58 """Base class for `POFile` and `DummyPOFile`.
59
60@@ -166,102 +154,6 @@
61 header.has_plural_forms = self.potemplate.hasPluralMessage()
62 return header
63
64- def _getTranslationSearchQuery(self, pofile, plural_form, text):
65- """Query to find `text` in `plural_form` translations of a `pofile`.
66-
67- This produces a list of clauses that can be used to search for
68- TranslationMessages containing `text` in their msgstr[`plural_form`].
69- Returned values are POTMsgSet ids containing them, expected to be
70- used in a UNION across all plural forms.
71- """
72- translation_match = """
73- -- Find translations containing `text`.
74- -- Like in findPOTMsgSetsContaining(), to avoid seqscans on
75- -- POTranslation table, we do ILIKE comparison on them in
76- -- a subselect which is first filtered by the POFile.
77- SELECT TranslationMessage.potmsgset
78- FROM TranslationMessage
79- JOIN TranslationTemplateItem
80- ON TranslationMessage.potmsgset
81- = TranslationTemplateItem.potmsgset
82- WHERE
83- TranslationTemplateItem.potemplate = %(potemplate)s AND
84- TranslationMessage.language = %(language)s AND
85- TranslationMessage.msgstr%(plural_form)d IN (
86- SELECT POTranslation.id FROM POTranslation WHERE
87- POTranslation.id IN (
88- SELECT DISTINCT(msgstr%(plural_form)d)
89- FROM TranslationMessage AS tm_ids
90- JOIN TranslationTemplateItem
91- ON tm_ids.potmsgset=TranslationTemplateItem.potmsgset
92- WHERE
93- TranslationTemplateItem.potemplate
94- = %(potemplate)s AND
95- TranslationTemplateItem.sequence > 0 AND
96- tm_ids.language=%(language)s
97- ) AND
98- POTranslation.translation
99- ILIKE '%%' || %(text)s || '%%')
100- """ % dict(potemplate=quote(pofile.potemplate),
101- language=quote(pofile.language),
102- plural_form=plural_form,
103- text=quote_like(text))
104- return translation_match
105-
106- def _getTemplateSearchQuery(self, text):
107- """Query for finding `text` in msgids of this POFile.
108- """
109- english_match = """
110- -- Step 1a: get POTMsgSets where msgid_singular contains `text`
111- -- To avoid seqscans on POMsgID table (what LIKE usually
112- -- does), we do ILIKE comparison on them in a subselect first
113- -- filtered by this POTemplate.
114- SELECT POTMsgSet.id
115- FROM POTMsgSet
116- JOIN TranslationTemplateItem
117- ON TranslationTemplateItem.potmsgset=POTMsgSet.id AND
118- TranslationTemplateItem.potemplate=%s
119- WHERE
120- (POTMsgSet.msgid_singular IS NOT NULL AND
121- POTMsgSet.msgid_singular IN (
122- SELECT POMsgID.id FROM POMsgID
123- WHERE id IN (
124- SELECT DISTINCT(POTMsgSet.msgid_singular)
125- FROM POTMsgSet
126- JOIN TranslationTemplateItem
127- ON TranslationTemplateItem.potmsgset = POTMsgSet.id
128- WHERE
129- TranslationTemplateItem.potemplate=%s AND
130- TranslationTemplateItem.sequence > 0
131- ) AND
132- msgid ILIKE '%%' || %s || '%%'))
133- UNION
134- -- Step 1b: like above, just on msgid_plural.
135- SELECT POTMsgSet.id
136- FROM POTMsgSet
137- JOIN TranslationTemplateItem
138- ON TranslationTemplateItem.potmsgset=POTMsgSet.id AND
139- TranslationTemplateItem.potemplate=%s
140- WHERE
141- (POTMsgSet.msgid_plural IS NOT NULL AND
142- POTMsgSet.msgid_plural IN (
143- SELECT POMsgID.id FROM POMsgID
144- WHERE id IN (
145- SELECT DISTINCT(POTMsgSet.msgid_plural)
146- FROM POTMsgSet
147- JOIN TranslationTemplateItem
148- ON TranslationTemplateItem.potmsgset = POTMsgSet.id
149- WHERE
150- TranslationTemplateItem.potemplate=%s AND
151- TranslationTemplateItem.sequence > 0
152- ) AND
153- msgid ILIKE '%%' || %s || '%%'))
154- """ % (quote(self.potemplate), quote(self.potemplate),
155- quote_like(text),
156- quote(self.potemplate), quote(self.potemplate),
157- quote_like(text))
158- return english_match
159-
160 def _getOrderedPOTMsgSets(self, origin_tables, query):
161 """Find all POTMsgSets matching `query` from `origin_tables`.
162
163@@ -274,41 +166,79 @@
164 POTMsgSet, query)
165 return results.order_by(TranslationTemplateItem.sequence)
166
167+ def _getTranslationSearchBits(self, pofile):
168+ ThisTM = ClassAlias(TranslationMessage)
169+ ThisPT = ClassAlias(POTranslation)
170+ str_id_cols = [
171+ getattr(ThisTM, 'msgstr%dID' % plural_form)
172+ for plural_form in range(pofile.plural_forms)]
173+ origin = [
174+ LeftJoin(
175+ ThisTM,
176+ And(
177+ ThisTM.potmsgsetID ==
178+ TranslationTemplateItem.potmsgsetID,
179+ ThisTM.languageID == pofile.languageID)),
180+ LeftJoin(
181+ ThisPT,
182+ ThisPT.id.is_in(str_id_cols)),
183+ ]
184+ search_options = [ThisPT.translation]
185+ return origin, search_options
186+
187 def findPOTMsgSetsContaining(self, text):
188 """See `IPOFile`."""
189+ origin = [
190+ POTMsgSet,
191+ Join(
192+ TranslationTemplateItem,
193+ TranslationTemplateItem.potmsgsetID == POTMsgSet.id),
194+ ]
195 clauses = [
196- 'TranslationTemplateItem.potemplate = %s' % sqlvalues(
197- self.potemplate),
198- 'TranslationTemplateItem.potmsgset = POTMsgSet.id',
199- 'TranslationTemplateItem.sequence > 0',
200+ TranslationTemplateItem.potemplateID == self.potemplate.id,
201+ TranslationTemplateItem.sequence > 0,
202 ]
203
204 if text is not None:
205 assert len(text) > 1, (
206 "You can not search for strings shorter than 2 characters.")
207
208+ # A list of string columns, of which at least one must match
209+ # the search term.
210+ search_options = []
211+
212+ # Search the English strings.
213 if self.potemplate.uses_english_msgids:
214- english_match = self._getTemplateSearchQuery(text)
215+ origin.append(Join(
216+ POMsgID,
217+ POMsgID.id.is_in((
218+ TranslationTemplateItem.msgid_singularID,
219+ TranslationTemplateItem.msgid_pluralID))))
220+ search_options.extend([POMsgID.msgid])
221 else:
222- # If msgids are not in English, use English PO file
223- # to fetch original strings instead.
224- en_pofile = self.potemplate.getPOFileByLang('en')
225- english_match = self._getTranslationSearchQuery(
226- en_pofile, 0, text)
227+ en_po = self.potemplate.getPOFileByLang('en')
228+ en_origin, en_options = self._getTranslationSearchBits(en_po)
229+ origin.extend(en_origin)
230+ search_options.extend(en_options)
231
232- # Do not look for translations in a DummyPOFile.
233- search_clauses = [english_match]
234+ # Search the translations themselves, unless this is a
235+ # DummyPOFile.
236 if self.id is not None:
237- for plural_form in range(self.plural_forms):
238- translation_match = self._getTranslationSearchQuery(
239- self, plural_form, text)
240- search_clauses.append(translation_match)
241-
242- clauses.append(
243- "POTMsgSet.id IN (" + " UNION ".join(search_clauses) + ")")
244-
245- return self._getOrderedPOTMsgSets(
246- [POTMsgSet, TranslationTemplateItem], ' AND '.join(clauses))
247+ po_origin, po_options = self._getTranslationSearchBits(self)
248+ origin.extend(po_origin)
249+ search_options.extend(po_options)
250+
251+ # Case-insensitively substring-match the search term against
252+ # each searchable column.
253+ clauses.append(Or(*(
254+ str.like('%%%s%%' % text, case_sensitive=False)
255+ for str in search_options)))
256+
257+ result = IMasterStore(POTMsgSet).using(*origin).find(
258+ (POTMsgSet, TranslationTemplateItem),
259+ *clauses).config(distinct=True).order_by(
260+ TranslationTemplateItem.sequence)
261+ return DecoratedResultSet(result, itemgetter(0))
262
263 def getFullLanguageCode(self):
264 """See `IPOFile`."""
265@@ -529,25 +459,6 @@
266 "Calling prepareTranslationCredits on a message with "
267 "unknown credits type '%s'." % credits_type.title)
268
269- def _getClausesForPOFileMessages(self, current=True):
270- """Get TranslationMessages for the POFile via TranslationTemplateItem.
271-
272- Call-site will have to have appropriate clauseTables.
273- """
274- # When all the code that uses this method is moved to Storm,
275- # we can replace it with _getStormClausesForPOFileMessages
276- # and then remove it.
277- clauses = [
278- 'TranslationTemplateItem.potemplate = %s' % sqlvalues(
279- self.potemplate),
280- ('TranslationTemplateItem.potmsgset'
281- ' = TranslationMessage.potmsgset'),
282- 'TranslationMessage.language = %s' % sqlvalues(self.language)]
283- if current:
284- clauses.append('TranslationTemplateItem.sequence > 0')
285-
286- return clauses
287-
288 def _getStormClausesForPOFileMessages(self, current=True):
289 """Get TranslationMessages for the POFile via TranslationTemplateItem.
290 """
291@@ -627,20 +538,10 @@
292 """See `IPOFile`."""
293 # We get all POTMsgSet.ids with translations, and later
294 # exclude them using a NOT IN subselect.
295- translated_clauses, clause_tables = self._getTranslatedMessagesQuery()
296+ trans_clauses, trans_tables = self._getTranslatedMessagesQuery()
297 translated_query = Select(
298- POTMsgSet.id,
299- tables=[TranslationTemplateItem, TranslationMessage, POTMsgSet],
300- where=And(
301- # Even though this seems silly, Postgres prefers
302- # TranslationTemplateItem index if we add it (and on
303- # staging we get more than a 10x speed improvement: from
304- # 8s to 0.7s). We also need to put it before any other
305- # clauses to be actually useful.
306- TranslationTemplateItem.potmsgsetID ==
307- TranslationTemplateItem.potmsgsetID,
308- POTMsgSet.id == TranslationTemplateItem.potmsgsetID,
309- *translated_clauses))
310+ TranslationTemplateItem.potmsgsetID,
311+ tables=trans_tables, where=And(*trans_clauses))
312 clauses = [
313 TranslationTemplateItem.potemplateID == self.potemplate.id,
314 TranslationTemplateItem.potmsgsetID == POTMsgSet.id,
315@@ -669,7 +570,7 @@
316 Coalesce(Diverged.date_reviewed, Diverged.date_created),
317 tables=[Diverged],
318 where=And(
319- Diverged.potmsgsetID == POTMsgSet.id,
320+ Diverged.potmsgsetID == TranslationTemplateItem.potmsgsetID,
321 Diverged.languageID == self.language.id,
322 getattr(Diverged, flag_name),
323 Diverged.potemplateID == self.potemplate.id))
324@@ -679,7 +580,7 @@
325 Coalesce(Shared.date_reviewed, Shared.date_created),
326 tables=[Shared],
327 where=And(
328- Shared.potmsgsetID == POTMsgSet.id,
329+ Shared.potmsgsetID == TranslationTemplateItem.potmsgsetID,
330 Shared.languageID == self.language.id,
331 getattr(Shared, flag_name),
332 Shared.potemplateID == None))
333@@ -694,7 +595,7 @@
334 # A POT set has "new" suggestions if there is a non current
335 # TranslationMessage newer than the current reviewed one.
336 query = And(
337- POTMsgSet.id.is_in(
338+ TranslationTemplateItem.potmsgsetID.is_in(
339 Select(
340 TranslationMessage.potmsgsetID,
341 tables=[
342@@ -736,7 +637,7 @@
343 Diverged.potemplateID == self.potemplate.id))))
344 imported_clauses = [
345 Imported.id != TranslationMessage.id,
346- Imported.potmsgsetID == POTMsgSet.id,
347+ Imported.potmsgsetID == TranslationTemplateItem.potmsgsetID,
348 Imported.languageID == self.language.id,
349 getattr(Imported, other_side_flag_name),
350 Or(
351@@ -798,7 +699,8 @@
352 'table_name': table_name,
353 } for plural_form in range(1, self.plural_forms))
354 query.append(
355- '(POTMsgSet.msgid_plural IS NULL OR (%s))' % plurals_query)
356+ '(%(table_name)s.msgid_plural IS NULL OR (%(plurals_query)s))'
357+ % {'plurals_query': plurals_query, 'table_name': table_name})
358 return query
359
360 def _countTranslations(self):
361@@ -1531,7 +1433,7 @@
362 clauses = [
363 TranslationTemplateItem.potemplateID == POFile.potemplateID,
364 POTMsgSet.id == TranslationTemplateItem.potmsgsetID,
365- POTMsgSet.msgid_singular == POMsgID.id,
366+ TranslationTemplateItem.msgid_singular == POMsgID.id,
367 POMsgID.msgid.is_in(POTMsgSet.credits_message_ids),
368 ]
369 if untranslated:
370
371=== modified file 'lib/lp/translations/model/potmsgset.py'
372--- lib/lp/translations/model/potmsgset.py 2015-02-28 01:27:16 +0000
373+++ lib/lp/translations/model/potmsgset.py 2015-02-28 01:27:16 +0000
374@@ -21,9 +21,12 @@
375 StringCol,
376 )
377 from storm.expr import (
378+ And,
379 Coalesce,
380 Desc,
381+ Not,
382 Or,
383+ Select,
384 SQL,
385 )
386 from storm.store import (
387@@ -389,8 +392,9 @@
388 # Watch out when changing this condition: make sure it's done in
389 # a way so that indexes are indeed hit when the query is executed.
390 # Also note that there is a NOT(in_use_clause) index.
391- in_use_clause = (
392- "(is_current_ubuntu IS TRUE OR is_current_upstream IS TRUE)")
393+ in_use_clause = Or(
394+ TranslationMessage.is_current_ubuntu,
395+ TranslationMessage.is_current_upstream)
396 # Present a list of language + usage constraints to sql. A language
397 # can either be unconstrained, used, or suggested depending on which
398 # of suggested_languages, used_languages it appears in.
399@@ -401,26 +405,16 @@
400 used_languages = used_languages - both_languages
401 lang_used = []
402 if both_languages:
403- lang_used.append('TranslationMessage.language IN %s' %
404- quote(both_languages))
405+ lang_used.append(
406+ TranslationMessage.languageID.is_in(both_languages))
407 if used_languages:
408- lang_used.append('(TranslationMessage.language IN %s AND %s)' % (
409- quote(used_languages), in_use_clause))
410+ lang_used.append(And(
411+ TranslationMessage.languageID.is_in(used_languages),
412+ in_use_clause))
413 if suggested_languages:
414- lang_used.append(
415- '(TranslationMessage.language IN %s AND NOT %s)' % (
416- quote(suggested_languages), in_use_clause))
417-
418- msgsets = SQL('''msgsets AS (
419- SELECT POTMsgSet.id
420- FROM POTMsgSet
421- JOIN TranslationTemplateItem ON
422- TranslationTemplateItem.potmsgset = POTMsgSet.id
423- JOIN SuggestivePOTemplate ON
424- TranslationTemplateItem.potemplate =
425- SuggestivePOTemplate.potemplate
426- WHERE POTMsgSet.msgid_singular = %s and POTMsgSet.id <> %s
427- )''' % sqlvalues(self.msgid_singular, self))
428+ lang_used.append(And(
429+ TranslationMessage.languageID.is_in(suggested_languages),
430+ Not(in_use_clause)))
431
432 # Subquery to find the ids of TranslationMessages that are
433 # matching suggestions.
434@@ -429,25 +423,23 @@
435 # excluding older messages that are identical to newer ones in
436 # all translated forms. The Python code can later sort out the
437 # distinct translations per form.
438- msgstrs = ', '.join([
439+ msgstrs = [
440 'COALESCE(msgstr%d, -1)' % form
441- for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)])
442- ids_query_params = {
443- 'msgstrs': msgstrs,
444- 'where': '(' + ' OR '.join(lang_used) + ')',
445- }
446- ids_query = '''
447- SELECT DISTINCT ON (%(msgstrs)s)
448- TranslationMessage.id
449- FROM TranslationMessage
450- JOIN msgsets ON msgsets.id = TranslationMessage.potmsgset
451- WHERE %(where)s
452- ORDER BY %(msgstrs)s, date_created DESC
453- ''' % ids_query_params
454+ for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)]
455+ ids_query = Select(
456+ TranslationMessage.id,
457+ tables=[TranslationMessage],
458+ where=And(
459+ TranslationMessage.msgid_singular == self.msgid_singular,
460+ TranslationMessage.potmsgset != self.id,
461+ TranslationMessage.suggestive,
462+ Or(*lang_used)),
463+ order_by=msgstrs + [Desc(TranslationMessage.date_created)],
464+ distinct=msgstrs)
465
466- result = IStore(TranslationMessage).with_(msgsets).find(
467+ result = IStore(TranslationMessage).find(
468 TranslationMessage,
469- TranslationMessage.id.is_in(SQL(ids_query)))
470+ TranslationMessage.id.is_in(ids_query))
471
472 return shortlist(result, longest_expected=100, hardlimit=2000)
473
474
475=== modified file 'lib/lp/translations/tests/test_suggestions.py'
476--- lib/lp/translations/tests/test_suggestions.py 2012-01-01 02:58:52 +0000
477+++ lib/lp/translations/tests/test_suggestions.py 2015-02-28 01:27:16 +0000
478@@ -162,7 +162,8 @@
479 text = "The application has exploded."
480 suggested_dutch = "De applicatie is ontploft."
481 now = datetime.now(timezone('UTC'))
482- before = now - timedelta(1, 1, 1)
483+ before = now - timedelta(1, 1, 2)
484+ after = now - timedelta(1, 1, 1)
485
486 foomsg = self.factory.makePOTMsgSet(self.foo_template, text)
487 barmsg = self.factory.makePOTMsgSet(self.bar_template, text)
488@@ -173,7 +174,7 @@
489 pofile=self.bar_nl, potmsgset=barmsg,
490 translations={0: suggested_dutch})
491 self.assertNotEqual(suggestion1, suggestion2)
492- removeSecurityProxy(suggestion1).date_created = before
493+ removeSecurityProxy(suggestion1).date_created = after
494 removeSecurityProxy(suggestion2).date_created = before
495
496 # When a third project, oof, contains the same translatable