Merge lp:~wgrant/launchpad/bug-736005-trivialise into lp:launchpad

Proposed by William Grant
Status: Work in progress
Proposed branch: lp:~wgrant/launchpad/bug-736005-trivialise
Merge into: lp:launchpad
Diff against target: 242 lines (+65/-119)
1 file modified
lib/lp/translations/model/pofile.py (+65/-119)
To merge this branch: bzr merge lp:~wgrant/launchpad/bug-736005-trivialise
Reviewer Review Type Date Requested Status
Launchpad code reviewers Pending
Review via email: mp+250447@code.launchpad.net

Commit message

Rewrite the POFile:+translate text search query to perform adequately.

Description of the change

Rewrite translations text search to use the simplest query that can work, which happens to optimise best nowadays. The old code is littered with comments describing how simpler queries degrade to seqscans of POMsgID and POTranslation, but that's not the case in 9.3 with a sensible simple query.

https://pastebin.canonical.com/126018/ compares the new and old queries. Further optimisation will come soon once POTMsgSet.msgid_(singular|plural) are denormalised to TranslationTemplateItem, and the COUNT(*) queries are eliminated from large template views.

To post a comment you must log in.
17361. By William Grant

Drop duplicate join condition.

17362. By William Grant

Fix distinct by including TTI in the result.

17363. By William Grant

Don't double-join POMsgID and POTranslation.

Unmerged revisions

17363. By William Grant

Don't double-join POMsgID and POTranslation.

17362. By William Grant

Fix distinct by including TTI in the result.

17361. By William Grant

Drop duplicate join condition.

17360. By William Grant

Distinctify.

17359. By William Grant

Reimplement XPI translation search.

17358. By William Grant

Support multiple plural forms in search rewrite.

17357. By William Grant

Rewrite translation search to the simplest form that happens to perform a lot better than the old planner workarounds.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/translations/model/pofile.py'
--- lib/lp/translations/model/pofile.py 2013-06-20 05:50:00 +0000
+++ lib/lp/translations/model/pofile.py 2015-02-21 03:06:32 +0000
@@ -13,6 +13,7 @@
13 ]13 ]
1414
15import datetime15import datetime
16from operator import itemgetter
1617
17import pytz18import pytz
18from sqlobject import (19from sqlobject import (
@@ -49,6 +50,7 @@
49from lp.registry.interfaces.person import validate_public_person50from lp.registry.interfaces.person import validate_public_person
50from lp.services.database.constants import UTC_NOW51from lp.services.database.constants import UTC_NOW
51from lp.services.database.datetimecol import UtcDateTimeCol52from lp.services.database.datetimecol import UtcDateTimeCol
53from lp.services.database.decoratedresultset import DecoratedResultSet
52from lp.services.database.interfaces import (54from lp.services.database.interfaces import (
53 IMasterStore,55 IMasterStore,
54 IStore,56 IStore,
@@ -56,7 +58,6 @@
56from lp.services.database.sqlbase import (58from lp.services.database.sqlbase import (
57 flush_database_updates,59 flush_database_updates,
58 quote,60 quote,
59 quote_like,
60 SQLBase,61 SQLBase,
61 sqlvalues,62 sqlvalues,
62 )63 )
@@ -96,6 +97,7 @@
96 credits_message_str,97 credits_message_str,
97 POTMsgSet,98 POTMsgSet,
98 )99 )
100from lp.translations.model.potranslation import POTranslation
99from lp.translations.model.translationimportqueue import collect_import_info101from lp.translations.model.translationimportqueue import collect_import_info
100from lp.translations.model.translationmessage import (102from lp.translations.model.translationmessage import (
101 make_plurals_sql_fragment,103 make_plurals_sql_fragment,
@@ -165,102 +167,6 @@
165 header.has_plural_forms = self.potemplate.hasPluralMessage()167 header.has_plural_forms = self.potemplate.hasPluralMessage()
166 return header168 return header
167169
168 def _getTranslationSearchQuery(self, pofile, plural_form, text):
169 """Query to find `text` in `plural_form` translations of a `pofile`.
170
171 This produces a list of clauses that can be used to search for
172 TranslationMessages containing `text` in their msgstr[`plural_form`].
173 Returned values are POTMsgSet ids containing them, expected to be
174 used in a UNION across all plural forms.
175 """
176 translation_match = """
177 -- Find translations containing `text`.
178 -- Like in findPOTMsgSetsContaining(), to avoid seqscans on
179 -- POTranslation table, we do ILIKE comparison on them in
180 -- a subselect which is first filtered by the POFile.
181 SELECT TranslationMessage.potmsgset
182 FROM TranslationMessage
183 JOIN TranslationTemplateItem
184 ON TranslationMessage.potmsgset
185 = TranslationTemplateItem.potmsgset
186 WHERE
187 TranslationTemplateItem.potemplate = %(potemplate)s AND
188 TranslationMessage.language = %(language)s AND
189 TranslationMessage.msgstr%(plural_form)d IN (
190 SELECT POTranslation.id FROM POTranslation WHERE
191 POTranslation.id IN (
192 SELECT DISTINCT(msgstr%(plural_form)d)
193 FROM TranslationMessage AS tm_ids
194 JOIN TranslationTemplateItem
195 ON tm_ids.potmsgset=TranslationTemplateItem.potmsgset
196 WHERE
197 TranslationTemplateItem.potemplate
198 = %(potemplate)s AND
199 TranslationTemplateItem.sequence > 0 AND
200 tm_ids.language=%(language)s
201 ) AND
202 POTranslation.translation
203 ILIKE '%%' || %(text)s || '%%')
204 """ % dict(potemplate=quote(pofile.potemplate),
205 language=quote(pofile.language),
206 plural_form=plural_form,
207 text=quote_like(text))
208 return translation_match
209
210 def _getTemplateSearchQuery(self, text):
211 """Query for finding `text` in msgids of this POFile.
212 """
213 english_match = """
214 -- Step 1a: get POTMsgSets where msgid_singular contains `text`
215 -- To avoid seqscans on POMsgID table (what LIKE usually
216 -- does), we do ILIKE comparison on them in a subselect first
217 -- filtered by this POTemplate.
218 SELECT POTMsgSet.id
219 FROM POTMsgSet
220 JOIN TranslationTemplateItem
221 ON TranslationTemplateItem.potmsgset=POTMsgSet.id AND
222 TranslationTemplateItem.potemplate=%s
223 WHERE
224 (POTMsgSet.msgid_singular IS NOT NULL AND
225 POTMsgSet.msgid_singular IN (
226 SELECT POMsgID.id FROM POMsgID
227 WHERE id IN (
228 SELECT DISTINCT(msgid_singular)
229 FROM POTMsgSet
230 JOIN TranslationTemplateItem
231 ON TranslationTemplateItem.potmsgset = POTMsgSet.id
232 WHERE
233 TranslationTemplateItem.potemplate=%s AND
234 TranslationTemplateItem.sequence > 0
235 ) AND
236 msgid ILIKE '%%' || %s || '%%'))
237 UNION
238 -- Step 1b: like above, just on msgid_plural.
239 SELECT POTMsgSet.id
240 FROM POTMsgSet
241 JOIN TranslationTemplateItem
242 ON TranslationTemplateItem.potmsgset=POTMsgSet.id AND
243 TranslationTemplateItem.potemplate=%s
244 WHERE
245 (POTMsgSet.msgid_plural IS NOT NULL AND
246 POTMsgSet.msgid_plural IN (
247 SELECT POMsgID.id FROM POMsgID
248 WHERE id IN (
249 SELECT DISTINCT(msgid_plural)
250 FROM POTMsgSet
251 JOIN TranslationTemplateItem
252 ON TranslationTemplateItem.potmsgset = POTMsgSet.id
253 WHERE
254 TranslationTemplateItem.potemplate=%s AND
255 TranslationTemplateItem.sequence > 0
256 ) AND
257 msgid ILIKE '%%' || %s || '%%'))
258 """ % (quote(self.potemplate), quote(self.potemplate),
259 quote_like(text),
260 quote(self.potemplate), quote(self.potemplate),
261 quote_like(text))
262 return english_match
263
264 def _getOrderedPOTMsgSets(self, origin_tables, query):170 def _getOrderedPOTMsgSets(self, origin_tables, query):
265 """Find all POTMsgSets matching `query` from `origin_tables`.171 """Find all POTMsgSets matching `query` from `origin_tables`.
266172
@@ -271,41 +177,81 @@
271 POTMsgSet, SQL(query))177 POTMsgSet, SQL(query))
272 return results.order_by(TranslationTemplateItem.sequence)178 return results.order_by(TranslationTemplateItem.sequence)
273179
180 def _getTranslationSearchBits(self, pofile):
181 ThisTM = ClassAlias(TranslationMessage)
182 ThisPT = ClassAlias(POTranslation)
183 str_id_cols = [
184 getattr(ThisTM, 'msgstr%dID' % plural_form)
185 for plural_form in range(pofile.plural_forms)]
186 origin = [
187 LeftJoin(
188 ThisTM,
189 And(
190 ThisTM.potmsgsetID ==
191 TranslationTemplateItem.potmsgsetID,
192 ThisTM.languageID == pofile.languageID)),
193 LeftJoin(
194 ThisPT,
195 Or(*(ThisPT.id == col for col in str_id_cols))),
196 ]
197 search_options = [ThisPT.translation]
198 return origin, search_options
199
274 def findPOTMsgSetsContaining(self, text):200 def findPOTMsgSetsContaining(self, text):
275 """See `IPOFile`."""201 """See `IPOFile`."""
202 origin = [
203 POTMsgSet,
204 Join(
205 TranslationTemplateItem,
206 TranslationTemplateItem.potmsgsetID == POTMsgSet.id),
207 ]
276 clauses = [208 clauses = [
277 'TranslationTemplateItem.potemplate = %s' % sqlvalues(209 TranslationTemplateItem.potemplateID == self.potemplate.id,
278 self.potemplate),210 TranslationTemplateItem.sequence > 0,
279 'TranslationTemplateItem.potmsgset = POTMsgSet.id',
280 'TranslationTemplateItem.sequence > 0',
281 ]211 ]
282212
283 if text is not None:213 if text is not None:
284 assert len(text) > 1, (214 assert len(text) > 1, (
285 "You can not search for strings shorter than 2 characters.")215 "You can not search for strings shorter than 2 characters.")
286216
217 # A list of string columns, of which at least one must match
218 # the search term.
219 search_options = []
220
221 # Search the English strings.
287 if self.potemplate.uses_english_msgids:222 if self.potemplate.uses_english_msgids:
288 english_match = self._getTemplateSearchQuery(text)223 origin.extend([
224 Join(
225 POMsgID,
226 Or(
227 POMsgID.id == POTMsgSet.msgid_singularID,
228 POMsgID.id == POTMsgSet.msgid_pluralID)),
229 ])
230 search_options.extend([POMsgID.msgid])
289 else:231 else:
290 # If msgids are not in English, use English PO file232 en_po = self.potemplate.getPOFileByLang('en')
291 # to fetch original strings instead.233 en_origin, en_options = self._getTranslationSearchBits(en_po)
292 en_pofile = self.potemplate.getPOFileByLang('en')234 origin.extend(en_origin)
293 english_match = self._getTranslationSearchQuery(235 search_options.extend(en_options)
294 en_pofile, 0, text)
295236
296 # Do not look for translations in a DummyPOFile.237 # Search the translations themselves, unless this is a
297 search_clauses = [english_match]238 # DummyPOFile.
298 if self.id is not None:239 if self.id is not None:
299 for plural_form in range(self.plural_forms):240 po_origin, po_options = self._getTranslationSearchBits(self)
300 translation_match = self._getTranslationSearchQuery(241 origin.extend(po_origin)
301 self, plural_form, text)242 search_options.extend(po_options)
302 search_clauses.append(translation_match)243
303244 # Case-insensitively substring-match the search term against
304 clauses.append(245 # each searchable column.
305 "POTMsgSet.id IN (" + " UNION ".join(search_clauses) + ")")246 clauses.append(Or(*(
306247 str.like('%%%s%%' % text, case_sensitive=False)
307 return self._getOrderedPOTMsgSets(248 for str in search_options)))
308 [POTMsgSet, TranslationTemplateItem], ' AND '.join(clauses))249
250 result = IMasterStore(POTMsgSet).using(*origin).find(
251 (POTMsgSet, TranslationTemplateItem),
252 *clauses).config(distinct=True).order_by(
253 TranslationTemplateItem.sequence)
254 return DecoratedResultSet(result, itemgetter(0))
309255
310 def getFullLanguageCode(self):256 def getFullLanguageCode(self):
311 """See `IPOFile`."""257 """See `IPOFile`."""