Merge lp:~wgrant/launchpad/tm-performance-4-queries into lp:launchpad

Proposed by William Grant
Status: Work in progress
Proposed branch: lp:~wgrant/launchpad/tm-performance-4-queries
Merge into: lp:launchpad
Prerequisite: lp:~wgrant/launchpad/tm-performance-2-model
Diff against target: 496 lines (+105/-210)
3 files modified
lib/lp/translations/model/pofile.py (+74/-172)
lib/lp/translations/model/potmsgset.py (+28/-36)
lib/lp/translations/tests/test_suggestions.py (+3/-2)
To merge this branch: bzr merge lp:~wgrant/launchpad/tm-performance-4-queries
Reviewer Review Type Date Requested Status
Launchpad code reviewers Pending
Review via email: mp+251345@code.launchpad.net
To post a comment you must log in.
17375. By William Grant

Drop unused functions.

17376. By William Grant

Merge bug-736005-trivialise.

17377. By William Grant

Text searches now use TTI.msgid_*.

17378. By William Grant

POFile POTMsgSet filters now use TTI denormed columns.

Unmerged revisions

17378. By William Grant

POFile POTMsgSet filters now use TTI denormed columns.

17377. By William Grant

Text searches now use TTI.msgid_*.

17376. By William Grant

Merge bug-736005-trivialise.

17375. By William Grant

Drop unused functions.

17374. By William Grant

Merge tm-performance-2-model.

17373. By William Grant

Translation suggestion queries now use the denormed columns.

17372. By William Grant

Fix POTMsgSet.suggestive clone and tests.

17371. By William Grant

Set POTemplate.suggestive based on normal suggestiveness criteria, and add POTemplateSet.filterSuggestiveTargets with the same.

17370. By William Grant

Propagate suggestive down from POTemplate to POTMsgSet to TranslationMessage.

17369. By William Grant

submitSuggestion now uses _makeTranslationMessage.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/translations/model/pofile.py'
2--- lib/lp/translations/model/pofile.py 2015-02-27 22:58:59 +0000
3+++ lib/lp/translations/model/pofile.py 2015-02-28 01:27:16 +0000
4@@ -13,6 +13,7 @@
5 ]
6
7 import datetime
8+from operator import itemgetter
9
10 import pytz
11 from sqlobject import (
12@@ -50,6 +51,7 @@
13 from lp.registry.interfaces.person import validate_public_person
14 from lp.services.database.constants import UTC_NOW
15 from lp.services.database.datetimecol import UtcDateTimeCol
16+from lp.services.database.decoratedresultset import DecoratedResultSet
17 from lp.services.database.interfaces import (
18 IMasterStore,
19 IStore,
20@@ -57,7 +59,6 @@
21 from lp.services.database.sqlbase import (
22 flush_database_updates,
23 quote,
24- quote_like,
25 SQLBase,
26 sqlvalues,
27 )
28@@ -97,11 +98,9 @@
29 credits_message_str,
30 POTMsgSet,
31 )
32+from lp.translations.model.potranslation import POTranslation
33 from lp.translations.model.translationimportqueue import collect_import_info
34-from lp.translations.model.translationmessage import (
35- make_plurals_sql_fragment,
36- TranslationMessage,
37- )
38+from lp.translations.model.translationmessage import TranslationMessage
39 from lp.translations.model.translationtemplateitem import (
40 TranslationTemplateItem,
41 )
42@@ -112,17 +111,6 @@
43 )
44
45
46-def compose_sql_translationmessage_has_translations(tm_sql_identifier):
47- """Compose SQL for "`TranslationMessage` is nonempty.".
48-
49- :param tm_sql_identifier: The SQL identifier for the
50- `TranslationMessage` in the query that's to be tested.
51- """
52- return "COALESCE(%s) IS NOT NULL" % ", ".join([
53- "%s.msgstr%d" % (tm_sql_identifier, form)
54- for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)])
55-
56-
57 class POFileMixIn(RosettaStats):
58 """Base class for `POFile` and `DummyPOFile`.
59
60@@ -166,102 +154,6 @@
61 header.has_plural_forms = self.potemplate.hasPluralMessage()
62 return header
63
64- def _getTranslationSearchQuery(self, pofile, plural_form, text):
65- """Query to find `text` in `plural_form` translations of a `pofile`.
66-
67- This produces a list of clauses that can be used to search for
68- TranslationMessages containing `text` in their msgstr[`plural_form`].
69- Returned values are POTMsgSet ids containing them, expected to be
70- used in a UNION across all plural forms.
71- """
72- translation_match = """
73- -- Find translations containing `text`.
74- -- Like in findPOTMsgSetsContaining(), to avoid seqscans on
75- -- POTranslation table, we do ILIKE comparison on them in
76- -- a subselect which is first filtered by the POFile.
77- SELECT TranslationMessage.potmsgset
78- FROM TranslationMessage
79- JOIN TranslationTemplateItem
80- ON TranslationMessage.potmsgset
81- = TranslationTemplateItem.potmsgset
82- WHERE
83- TranslationTemplateItem.potemplate = %(potemplate)s AND
84- TranslationMessage.language = %(language)s AND
85- TranslationMessage.msgstr%(plural_form)d IN (
86- SELECT POTranslation.id FROM POTranslation WHERE
87- POTranslation.id IN (
88- SELECT DISTINCT(msgstr%(plural_form)d)
89- FROM TranslationMessage AS tm_ids
90- JOIN TranslationTemplateItem
91- ON tm_ids.potmsgset=TranslationTemplateItem.potmsgset
92- WHERE
93- TranslationTemplateItem.potemplate
94- = %(potemplate)s AND
95- TranslationTemplateItem.sequence > 0 AND
96- tm_ids.language=%(language)s
97- ) AND
98- POTranslation.translation
99- ILIKE '%%' || %(text)s || '%%')
100- """ % dict(potemplate=quote(pofile.potemplate),
101- language=quote(pofile.language),
102- plural_form=plural_form,
103- text=quote_like(text))
104- return translation_match
105-
106- def _getTemplateSearchQuery(self, text):
107- """Query for finding `text` in msgids of this POFile.
108- """
109- english_match = """
110- -- Step 1a: get POTMsgSets where msgid_singular contains `text`
111- -- To avoid seqscans on POMsgID table (what LIKE usually
112- -- does), we do ILIKE comparison on them in a subselect first
113- -- filtered by this POTemplate.
114- SELECT POTMsgSet.id
115- FROM POTMsgSet
116- JOIN TranslationTemplateItem
117- ON TranslationTemplateItem.potmsgset=POTMsgSet.id AND
118- TranslationTemplateItem.potemplate=%s
119- WHERE
120- (POTMsgSet.msgid_singular IS NOT NULL AND
121- POTMsgSet.msgid_singular IN (
122- SELECT POMsgID.id FROM POMsgID
123- WHERE id IN (
124- SELECT DISTINCT(POTMsgSet.msgid_singular)
125- FROM POTMsgSet
126- JOIN TranslationTemplateItem
127- ON TranslationTemplateItem.potmsgset = POTMsgSet.id
128- WHERE
129- TranslationTemplateItem.potemplate=%s AND
130- TranslationTemplateItem.sequence > 0
131- ) AND
132- msgid ILIKE '%%' || %s || '%%'))
133- UNION
134- -- Step 1b: like above, just on msgid_plural.
135- SELECT POTMsgSet.id
136- FROM POTMsgSet
137- JOIN TranslationTemplateItem
138- ON TranslationTemplateItem.potmsgset=POTMsgSet.id AND
139- TranslationTemplateItem.potemplate=%s
140- WHERE
141- (POTMsgSet.msgid_plural IS NOT NULL AND
142- POTMsgSet.msgid_plural IN (
143- SELECT POMsgID.id FROM POMsgID
144- WHERE id IN (
145- SELECT DISTINCT(POTMsgSet.msgid_plural)
146- FROM POTMsgSet
147- JOIN TranslationTemplateItem
148- ON TranslationTemplateItem.potmsgset = POTMsgSet.id
149- WHERE
150- TranslationTemplateItem.potemplate=%s AND
151- TranslationTemplateItem.sequence > 0
152- ) AND
153- msgid ILIKE '%%' || %s || '%%'))
154- """ % (quote(self.potemplate), quote(self.potemplate),
155- quote_like(text),
156- quote(self.potemplate), quote(self.potemplate),
157- quote_like(text))
158- return english_match
159-
160 def _getOrderedPOTMsgSets(self, origin_tables, query):
161 """Find all POTMsgSets matching `query` from `origin_tables`.
162
163@@ -274,41 +166,79 @@
164 POTMsgSet, query)
165 return results.order_by(TranslationTemplateItem.sequence)
166
167+ def _getTranslationSearchBits(self, pofile):
168+ ThisTM = ClassAlias(TranslationMessage)
169+ ThisPT = ClassAlias(POTranslation)
170+ str_id_cols = [
171+ getattr(ThisTM, 'msgstr%dID' % plural_form)
172+ for plural_form in range(pofile.plural_forms)]
173+ origin = [
174+ LeftJoin(
175+ ThisTM,
176+ And(
177+ ThisTM.potmsgsetID ==
178+ TranslationTemplateItem.potmsgsetID,
179+ ThisTM.languageID == pofile.languageID)),
180+ LeftJoin(
181+ ThisPT,
182+ ThisPT.id.is_in(str_id_cols)),
183+ ]
184+ search_options = [ThisPT.translation]
185+ return origin, search_options
186+
187 def findPOTMsgSetsContaining(self, text):
188 """See `IPOFile`."""
189+ origin = [
190+ POTMsgSet,
191+ Join(
192+ TranslationTemplateItem,
193+ TranslationTemplateItem.potmsgsetID == POTMsgSet.id),
194+ ]
195 clauses = [
196- 'TranslationTemplateItem.potemplate = %s' % sqlvalues(
197- self.potemplate),
198- 'TranslationTemplateItem.potmsgset = POTMsgSet.id',
199- 'TranslationTemplateItem.sequence > 0',
200+ TranslationTemplateItem.potemplateID == self.potemplate.id,
201+ TranslationTemplateItem.sequence > 0,
202 ]
203
204 if text is not None:
205 assert len(text) > 1, (
206 "You can not search for strings shorter than 2 characters.")
207
208+ # A list of string columns, of which at least one must match
209+ # the search term.
210+ search_options = []
211+
212+ # Search the English strings.
213 if self.potemplate.uses_english_msgids:
214- english_match = self._getTemplateSearchQuery(text)
215+ origin.append(Join(
216+ POMsgID,
217+ POMsgID.id.is_in((
218+ TranslationTemplateItem.msgid_singularID,
219+ TranslationTemplateItem.msgid_pluralID))))
220+ search_options.extend([POMsgID.msgid])
221 else:
222- # If msgids are not in English, use English PO file
223- # to fetch original strings instead.
224- en_pofile = self.potemplate.getPOFileByLang('en')
225- english_match = self._getTranslationSearchQuery(
226- en_pofile, 0, text)
227+ en_po = self.potemplate.getPOFileByLang('en')
228+ en_origin, en_options = self._getTranslationSearchBits(en_po)
229+ origin.extend(en_origin)
230+ search_options.extend(en_options)
231
232- # Do not look for translations in a DummyPOFile.
233- search_clauses = [english_match]
234+ # Search the translations themselves, unless this is a
235+ # DummyPOFile.
236 if self.id is not None:
237- for plural_form in range(self.plural_forms):
238- translation_match = self._getTranslationSearchQuery(
239- self, plural_form, text)
240- search_clauses.append(translation_match)
241-
242- clauses.append(
243- "POTMsgSet.id IN (" + " UNION ".join(search_clauses) + ")")
244-
245- return self._getOrderedPOTMsgSets(
246- [POTMsgSet, TranslationTemplateItem], ' AND '.join(clauses))
247+ po_origin, po_options = self._getTranslationSearchBits(self)
248+ origin.extend(po_origin)
249+ search_options.extend(po_options)
250+
251+ # Case-insensitively substring-match the search term against
252+ # each searchable column.
253+ clauses.append(Or(*(
254+ str.like('%%%s%%' % text, case_sensitive=False)
255+ for str in search_options)))
256+
257+ result = IMasterStore(POTMsgSet).using(*origin).find(
258+ (POTMsgSet, TranslationTemplateItem),
259+ *clauses).config(distinct=True).order_by(
260+ TranslationTemplateItem.sequence)
261+ return DecoratedResultSet(result, itemgetter(0))
262
263 def getFullLanguageCode(self):
264 """See `IPOFile`."""
265@@ -529,25 +459,6 @@
266 "Calling prepareTranslationCredits on a message with "
267 "unknown credits type '%s'." % credits_type.title)
268
269- def _getClausesForPOFileMessages(self, current=True):
270- """Get TranslationMessages for the POFile via TranslationTemplateItem.
271-
272- Call-site will have to have appropriate clauseTables.
273- """
274- # When all the code that uses this method is moved to Storm,
275- # we can replace it with _getStormClausesForPOFileMessages
276- # and then remove it.
277- clauses = [
278- 'TranslationTemplateItem.potemplate = %s' % sqlvalues(
279- self.potemplate),
280- ('TranslationTemplateItem.potmsgset'
281- ' = TranslationMessage.potmsgset'),
282- 'TranslationMessage.language = %s' % sqlvalues(self.language)]
283- if current:
284- clauses.append('TranslationTemplateItem.sequence > 0')
285-
286- return clauses
287-
288 def _getStormClausesForPOFileMessages(self, current=True):
289 """Get TranslationMessages for the POFile via TranslationTemplateItem.
290 """
291@@ -627,20 +538,10 @@
292 """See `IPOFile`."""
293 # We get all POTMsgSet.ids with translations, and later
294 # exclude them using a NOT IN subselect.
295- translated_clauses, clause_tables = self._getTranslatedMessagesQuery()
296+ trans_clauses, trans_tables = self._getTranslatedMessagesQuery()
297 translated_query = Select(
298- POTMsgSet.id,
299- tables=[TranslationTemplateItem, TranslationMessage, POTMsgSet],
300- where=And(
301- # Even though this seems silly, Postgres prefers
302- # TranslationTemplateItem index if we add it (and on
303- # staging we get more than a 10x speed improvement: from
304- # 8s to 0.7s). We also need to put it before any other
305- # clauses to be actually useful.
306- TranslationTemplateItem.potmsgsetID ==
307- TranslationTemplateItem.potmsgsetID,
308- POTMsgSet.id == TranslationTemplateItem.potmsgsetID,
309- *translated_clauses))
310+ TranslationTemplateItem.potmsgsetID,
311+ tables=trans_tables, where=And(*trans_clauses))
312 clauses = [
313 TranslationTemplateItem.potemplateID == self.potemplate.id,
314 TranslationTemplateItem.potmsgsetID == POTMsgSet.id,
315@@ -669,7 +570,7 @@
316 Coalesce(Diverged.date_reviewed, Diverged.date_created),
317 tables=[Diverged],
318 where=And(
319- Diverged.potmsgsetID == POTMsgSet.id,
320+ Diverged.potmsgsetID == TranslationTemplateItem.potmsgsetID,
321 Diverged.languageID == self.language.id,
322 getattr(Diverged, flag_name),
323 Diverged.potemplateID == self.potemplate.id))
324@@ -679,7 +580,7 @@
325 Coalesce(Shared.date_reviewed, Shared.date_created),
326 tables=[Shared],
327 where=And(
328- Shared.potmsgsetID == POTMsgSet.id,
329+ Shared.potmsgsetID == TranslationTemplateItem.potmsgsetID,
330 Shared.languageID == self.language.id,
331 getattr(Shared, flag_name),
332 Shared.potemplateID == None))
333@@ -694,7 +595,7 @@
334 # A POT set has "new" suggestions if there is a non current
335 # TranslationMessage newer than the current reviewed one.
336 query = And(
337- POTMsgSet.id.is_in(
338+ TranslationTemplateItem.potmsgsetID.is_in(
339 Select(
340 TranslationMessage.potmsgsetID,
341 tables=[
342@@ -736,7 +637,7 @@
343 Diverged.potemplateID == self.potemplate.id))))
344 imported_clauses = [
345 Imported.id != TranslationMessage.id,
346- Imported.potmsgsetID == POTMsgSet.id,
347+ Imported.potmsgsetID == TranslationTemplateItem.potmsgsetID,
348 Imported.languageID == self.language.id,
349 getattr(Imported, other_side_flag_name),
350 Or(
351@@ -798,7 +699,8 @@
352 'table_name': table_name,
353 } for plural_form in range(1, self.plural_forms))
354 query.append(
355- '(POTMsgSet.msgid_plural IS NULL OR (%s))' % plurals_query)
356+ '(%(table_name)s.msgid_plural IS NULL OR (%(plurals_query)s))'
357+ % {'plurals_query': plurals_query, 'table_name': table_name})
358 return query
359
360 def _countTranslations(self):
361@@ -1531,7 +1433,7 @@
362 clauses = [
363 TranslationTemplateItem.potemplateID == POFile.potemplateID,
364 POTMsgSet.id == TranslationTemplateItem.potmsgsetID,
365- POTMsgSet.msgid_singular == POMsgID.id,
366+ TranslationTemplateItem.msgid_singular == POMsgID.id,
367 POMsgID.msgid.is_in(POTMsgSet.credits_message_ids),
368 ]
369 if untranslated:
370
371=== modified file 'lib/lp/translations/model/potmsgset.py'
372--- lib/lp/translations/model/potmsgset.py 2015-02-28 01:27:16 +0000
373+++ lib/lp/translations/model/potmsgset.py 2015-02-28 01:27:16 +0000
374@@ -21,9 +21,12 @@
375 StringCol,
376 )
377 from storm.expr import (
378+ And,
379 Coalesce,
380 Desc,
381+ Not,
382 Or,
383+ Select,
384 SQL,
385 )
386 from storm.store import (
387@@ -389,8 +392,9 @@
388 # Watch out when changing this condition: make sure it's done in
389 # a way so that indexes are indeed hit when the query is executed.
390 # Also note that there is a NOT(in_use_clause) index.
391- in_use_clause = (
392- "(is_current_ubuntu IS TRUE OR is_current_upstream IS TRUE)")
393+ in_use_clause = Or(
394+ TranslationMessage.is_current_ubuntu,
395+ TranslationMessage.is_current_upstream)
396 # Present a list of language + usage constraints to sql. A language
397 # can either be unconstrained, used, or suggested depending on which
398 # of suggested_languages, used_languages it appears in.
399@@ -401,26 +405,16 @@
400 used_languages = used_languages - both_languages
401 lang_used = []
402 if both_languages:
403- lang_used.append('TranslationMessage.language IN %s' %
404- quote(both_languages))
405+ lang_used.append(
406+ TranslationMessage.languageID.is_in(both_languages))
407 if used_languages:
408- lang_used.append('(TranslationMessage.language IN %s AND %s)' % (
409- quote(used_languages), in_use_clause))
410+ lang_used.append(And(
411+ TranslationMessage.languageID.is_in(used_languages),
412+ in_use_clause))
413 if suggested_languages:
414- lang_used.append(
415- '(TranslationMessage.language IN %s AND NOT %s)' % (
416- quote(suggested_languages), in_use_clause))
417-
418- msgsets = SQL('''msgsets AS (
419- SELECT POTMsgSet.id
420- FROM POTMsgSet
421- JOIN TranslationTemplateItem ON
422- TranslationTemplateItem.potmsgset = POTMsgSet.id
423- JOIN SuggestivePOTemplate ON
424- TranslationTemplateItem.potemplate =
425- SuggestivePOTemplate.potemplate
426- WHERE POTMsgSet.msgid_singular = %s and POTMsgSet.id <> %s
427- )''' % sqlvalues(self.msgid_singular, self))
428+ lang_used.append(And(
429+ TranslationMessage.languageID.is_in(suggested_languages),
430+ Not(in_use_clause)))
431
432 # Subquery to find the ids of TranslationMessages that are
433 # matching suggestions.
434@@ -429,25 +423,23 @@
435 # excluding older messages that are identical to newer ones in
436 # all translated forms. The Python code can later sort out the
437 # distinct translations per form.
438- msgstrs = ', '.join([
439+ msgstrs = [
440 'COALESCE(msgstr%d, -1)' % form
441- for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)])
442- ids_query_params = {
443- 'msgstrs': msgstrs,
444- 'where': '(' + ' OR '.join(lang_used) + ')',
445- }
446- ids_query = '''
447- SELECT DISTINCT ON (%(msgstrs)s)
448- TranslationMessage.id
449- FROM TranslationMessage
450- JOIN msgsets ON msgsets.id = TranslationMessage.potmsgset
451- WHERE %(where)s
452- ORDER BY %(msgstrs)s, date_created DESC
453- ''' % ids_query_params
454+ for form in xrange(TranslationConstants.MAX_PLURAL_FORMS)]
455+ ids_query = Select(
456+ TranslationMessage.id,
457+ tables=[TranslationMessage],
458+ where=And(
459+ TranslationMessage.msgid_singular == self.msgid_singular,
460+ TranslationMessage.potmsgset != self.id,
461+ TranslationMessage.suggestive,
462+ Or(*lang_used)),
463+ order_by=msgstrs + [Desc(TranslationMessage.date_created)],
464+ distinct=msgstrs)
465
466- result = IStore(TranslationMessage).with_(msgsets).find(
467+ result = IStore(TranslationMessage).find(
468 TranslationMessage,
469- TranslationMessage.id.is_in(SQL(ids_query)))
470+ TranslationMessage.id.is_in(ids_query))
471
472 return shortlist(result, longest_expected=100, hardlimit=2000)
473
474
475=== modified file 'lib/lp/translations/tests/test_suggestions.py'
476--- lib/lp/translations/tests/test_suggestions.py 2012-01-01 02:58:52 +0000
477+++ lib/lp/translations/tests/test_suggestions.py 2015-02-28 01:27:16 +0000
478@@ -162,7 +162,8 @@
479 text = "The application has exploded."
480 suggested_dutch = "De applicatie is ontploft."
481 now = datetime.now(timezone('UTC'))
482- before = now - timedelta(1, 1, 1)
483+ before = now - timedelta(1, 1, 2)
484+ after = now - timedelta(1, 1, 1)
485
486 foomsg = self.factory.makePOTMsgSet(self.foo_template, text)
487 barmsg = self.factory.makePOTMsgSet(self.bar_template, text)
488@@ -173,7 +174,7 @@
489 pofile=self.bar_nl, potmsgset=barmsg,
490 translations={0: suggested_dutch})
491 self.assertNotEqual(suggestion1, suggestion2)
492- removeSecurityProxy(suggestion1).date_created = before
493+ removeSecurityProxy(suggestion1).date_created = after
494 removeSecurityProxy(suggestion2).date_created = before
495
496 # When a third project, oof, contains the same translatable