Hi Jeroen, This branch is a nice clean up. I've got a few comments on style. I'm curious also about your/Danilo's choice to re-open a bug that was fix-committed to make these clean-up changes. Seems like an odd way to handle it, but not really any of my concern here. > === modified file 'lib/lp/translations/doc/message-sharing-merge-script.txt' > --- lib/lp/translations/doc/message-sharing-merge-script.txt 2009-07-06 08:04:08 +0000 > +++ lib/lp/translations/doc/message-sharing-merge-script.txt 2009-08-04 13:37:57 +0000 > @@ -15,3 +15,10 @@ > > >>> retcode > 0 > + > + > +# The script modified the database, even though the database layer may > +# not have noticed it. > + > + >>> from canonical.launchpad.ftests.harness import LaunchpadTestSetup > + >>> LaunchpadTestSetup().force_dirty_database() > === modified file 'lib/lp/translations/scripts/message_sharing_migration.py' > --- lib/lp/translations/scripts/message_sharing_migration.py 2009-07-19 04:41:14 +0000 > +++ lib/lp/translations/scripts/message_sharing_migration.py 2009-08-05 11:44:49 +0000 Would you mind fixing the __all__ line to make it multi-line (is that our convention when there is just one entry?) or remove the extra spaces. > @@ -8,8 +8,9 @@ > from zope.component import getUtility > from zope.security.proxy import removeSecurityProxy > > +from canonical.launchpad.utilities.orderingcheck import OrderingCheck > from lp.translations.interfaces.potemplate import IPOTemplateSet > -from canonical.launchpad.utilities.orderingcheck import OrderingCheck > +from lp.translations.interfaces.translations import TranslationConstants > from lp.registry.interfaces.product import IProductSet > from lp.registry.interfaces.sourcepackagename import ISourcePackageNameSet > from lp.services.scripts.base import ( > @@ -146,6 +147,8 @@ > > class MessageSharingMerge(LaunchpadScript): > > + templateset = None How about 'template_set' as it follows our coding standard? > def add_my_options(self): > self.parser.add_option('-d', '--distribution', dest='distribution', > help="Distribution to merge messages for.") > @@ -166,6 +169,16 @@ > action='store_true', > help="Dry run, don't really make any changes.") > > + def _setUpUtilities(self): > + """Prepare a few members that several methods need. > + > + Calling this again later does nothing. > + """ > + if self.templateset is None: > + self.templateset = getUtility(IPOTemplateSet) > + self.compare_template_precedence = ( > + self.templateset.compareSharingPrecedence) > + > def main(self): > actions = ( > self.options.merge_potmsgsets or > @@ -214,7 +227,9 @@ > "Unknown source package name: '%s'" % > self.options.sourcepackage) > > - subset = getUtility(IPOTemplateSet).getSharingSubset( > + self._setUpUtilities() > + > + subset = self.templateset.getSharingSubset( > product=product, distribution=distribution, > sourcepackagename=sourcepackagename) > equivalence_classes = subset.groupEquivalentPOTemplates( > @@ -250,6 +265,7 @@ > > def _mergePOTMsgSets(self, potemplates): > """Merge POTMsgSets for given sequence of sharing templates.""" > + self._setUpUtilities() > > # Map each POTMsgSet key (context, msgid, plural) to its > # representative POTMsgSet. > @@ -267,8 +283,7 @@ > # through the templates, starting at the most representative and > # moving towards the least representative. For any unique potmsgset > # key we find, the first POTMsgSet is the representative one. > - templateset = getUtility(IPOTemplateSet) > - order_check = OrderingCheck(cmp=templateset.compareSharingPrecedence) > + order_check = OrderingCheck(cmp=self.compare_template_precedence) > > for template in potemplates: > order_check.check(template) > @@ -288,13 +303,7 @@ > # translation messages. We don't need do this for subordinates > # because the algorithm will refuse to add new duplicates to the > # representative POTMsgSet anyway. > - existing_tms = self._scrubPOTMsgSetTranslations(representative) > - > - # Keep track of current/imported TranslationMessages for > - # representative. These will matter when we try to merge > - # subordinate TranslationMessages into the representative, some > - # of which may be current or imported as well. > - current_tms, imported_tms = self._findUsedMessages(existing_tms) > + self._scrubPOTMsgSetTranslations(representative) > > seen_potmsgsets = set([representative]) > > @@ -309,8 +318,8 @@ > message = removeSecurityProxy(message) > > clashing_current, clashing_imported, twin = ( > - self._findClashesFromDicts( > - existing_tms, current_tms, imported_tms, message)) > + self._findClashes( > + message, representative, message.potemplate)) > > if clashing_current or clashing_imported: > saved = self._saveByDiverging( > @@ -345,8 +354,8 @@ > > def _mergeTranslationMessages(self, potemplates): > """Share `TranslationMessage`s between templates where possible.""" > - templateset = getUtility(IPOTemplateSet) > - order_check = OrderingCheck(cmp=templateset.compareSharingPrecedence) > + self._setUpUtilities() > + order_check = OrderingCheck(cmp=self.compare_template_precedence) > for template in potemplates: > order_check.check(template) > for potmsgset in template.getPOTMsgSets(False): > @@ -361,7 +370,13 @@ > potmsgset (because we start out with one) and potemplate (because > that's sorted out in the nested dicts). > """ > - return (tm.language, tm.variant) + tuple(tm.all_msgstrs) > + tm = removeSecurityProxy(tm) > + msgstr_ids = [ > + getattr(tm, 'msgstr%dID' % form) > + for form in xrange(TranslationConstants.MAX_PLURAL_FORMS) > + ] The ] should be dedented by 4. > + return (tm.language, tm.variant) + tuple(msgstr_ids) > > def _mapExistingMessages(self, potmsgset): > """Map out the existing TranslationMessages for `potmsgset`. > @@ -387,33 +402,6 @@ > > return existing_tms > > - def _findUsedMessages(self, translations): > - """Fish current and imported messages out of translations dict. > - > - :param translations: a dict as returned by > - `_mapExistingMessages` or `_scrubPOTMsgSetTranslations`. All > - information is taken from this dict; no other messages are > - considered. > - :return: a pair of dicts, each mapping (potemplate, language, > - variant) to a TranslationMessage or to None. The dicts map > - out current and imported messages, respectively. Shared > - messages have a POTemplate of None. > - """ > - current_messages = {} > - imported_messages = {} > - for per_template in translations.itervalues(): > - for tms in per_template.itervalues(): > - assert len(tms) == 1, ( > - "Unexpected list size: %d items." % len(tms)) > - tm = tms[0] > - key = (tm.potemplate, tm.language, tm.variant) > - if tm.is_current: > - current_messages[key] = tm > - if tm.is_imported: > - imported_messages[key] = tm > - > - return current_messages, imported_messages > - > def _scrubPOTMsgSetTranslations(self, potmsgset): > """Map out translations for `potmsgset`, and eliminate duplicates. > > @@ -487,41 +475,6 @@ > # message can be merged into the twin without problems. > return filter_clashes(clashing_current, clashing_imported, twin) > > - def _findClashesFromDicts(self, existing_tms, current_tms, imported_tms, > - message): > - """Like `_findClashes`, but from cached dicts. > - > - This saves some database querying for the common case. > - > - :param existing_tms: a dict as returned by > - `_mapExistingMessages`. > - :param current_tms: a dict as returned by `_findUsedMessages`. > - """ > - twins_dict = existing_tms.get( > - self._getPOTMsgSetTranslationMessageKey(message)) > - if twins_dict is None: > - twin = None > - else: > - twins = twins_dict.get(message.potemplate) > - if twins is None: > - twin = None > - else: > - assert len(twins) == 1, "Did not expect %d twins." % ( > - len(twins)) > - twin = twins[0] > - > - subkey = message.potemplate, message.language, message.variant > - if message.is_current: > - clashing_current = current_tms.get(subkey) > - else: > - clashing_current = None > - if message.is_imported: > - clashing_imported = imported_tms.get(subkey) > - else: > - clashing_imported = None > - > - return filter_clashes(clashing_current, clashing_imported, twin) > - > def _saveByDiverging(self, message, target_potmsgset, source_potmsgset): > """Avoid a TranslationMessage clash during POTMsgSet merge. > > @@ -539,8 +492,10 @@ > # This message was shared. Maybe we can still save it for at > # least one template by making it diverged. > target_ttis = source_potmsgset.getAllTranslationTemplateItems() > - for tti in target_ttis: > - if self._divergeTo(message, target_potmsgset, tti.potemplate): > + target_templates = [tti.potemplate for tti in target_ttis] > + target_templates.sort(self.compare_template_precedence) > + for template in target_templates: > + if self._divergeTo(message, target_potmsgset, template): > return True > > # No, there's no place where this message can be preserved. It'll > === modified file 'lib/lp/translations/scripts/tests/test_message_sharing_migration.py' > --- lib/lp/translations/scripts/tests/test_message_sharing_migration.py 2009-07-19 04:41:14 +0000 > +++ lib/lp/translations/scripts/tests/test_message_sharing_migration.py 2009-08-05 11:44:49 +0000 > @@ -41,7 +41,7 @@ > self.stable_template.iscurrent = False > self.templates = [self.trunk_template, self.stable_template] > > - self.script = MessageSharingMerge('tms-merging-test') > + self.script = MessageSharingMerge('tms-merging-test', test_args=[]) > self.script.logger.setLevel(ERROR) > > > @@ -617,47 +617,6 @@ > > self.assertEqual(map, expected) > > - def test_FindNoUsedMessage(self): > - # Test _findUsedMessage against a case where there are no used > - # messages. > - message = self._makeTranslationMessage( > - pofile=self.trunk_pofile, potmsgset=self.trunk_potmsgset, > - text='godot', diverged=False) > - message.is_current = False > - key = self.script._getPOTMsgSetTranslationMessageKey(message) > - map = {key: {self.trunk_template: [message]}} > - > - self.assertEqual(self.script._findUsedMessages(map), ({}, {})) > - > - def test_FindUsedMessages(self): > - # _findUsedMessages maps diverged messages based on template, > - # language, and variant. The difference for shared messages is > - # that their template is None. > - imported_message = self._makeTranslationMessage( > - pofile=self.trunk_pofile, potmsgset=self.trunk_potmsgset, > - text='pog', diverged=True) > - imported_message.is_current = False > - imported_message.is_imported = True > - current_message = self._makeTranslationMessage( > - pofile=self.trunk_pofile, potmsgset=self.trunk_potmsgset, > - text='klexdigal', diverged=False) > - > - current_key = self.script._getPOTMsgSetTranslationMessageKey( > - current_message) > - imported_key = self.script._getPOTMsgSetTranslationMessageKey( > - imported_message) > - > - map = { > - current_key: {self.trunk_template: [current_message]}, > - imported_key: { self.trunk_template: [imported_message]}, > - } > - > - expected = ( > - {(None, self.dutch, None): current_message}, > - {(self.trunk_template, self.dutch, None): imported_message}) > - > - self.assertEqual(self.script._findUsedMessages(map), expected) > - > def test_ScrubPOTMsgSetTranslationsWithoutDuplication(self): > # _scrubPOTMsgSetTranslations eliminates duplicated > # TranslationMessages. If it doesn't find any, nothing happens. > @@ -776,95 +735,6 @@ > self.assertEqual(imported_clash, None) > self.assertEqual(twin, trunk_message) > > - def test_FindCurrentClashFromDicts(self): > - # Like test_FindCurrentClash, but using cached dicts. > - trunk_message, stable_message = self._makeTranslationMessages( > - 'ex', 'why', trunk_diverged=False, stable_diverged=False) > - > - trunk_key = self.script._getPOTMsgSetTranslationMessageKey( > - trunk_message) > - stable_key = self.script._getPOTMsgSetTranslationMessageKey( > - stable_message) > - > - existing = self.script._mapExistingMessages(self.trunk_potmsgset) > - current, imported = self.script._findUsedMessages(existing) > - current_clash, imported_clash, twin = ( > - self.script._findClashesFromDicts( > - existing, current, imported, stable_message)) > - > - self.assertEqual(current_clash, trunk_message) > - self.assertEqual(imported_clash, None) > - self.assertEqual(twin, None) > - > - > - def test_FindImportedClashFromDicts(self): > - # Like test_FindImportedClash, but using cached dicts. > - trunk_message, stable_message = self._makeTranslationMessages( > - 'ex', 'why', trunk_diverged=False, stable_diverged=False) > - > - for message in (trunk_message, stable_message): > - message.is_current = False > - message.is_imported = True > - > - existing = self.script._mapExistingMessages(self.trunk_potmsgset) > - current, imported = self.script._findUsedMessages(existing) > - current_clash, imported_clash, twin = ( > - self.script._findClashesFromDicts( > - existing, current, imported, stable_message)) > - > - self.assertEqual(current_clash, None) > - self.assertEqual(imported_clash, trunk_message) > - self.assertEqual(twin, None) > - > - def test_FindTwinFromDicts(self): > - # Like test_FindTwin, but using cached dicts. > - trunk_message, stable_message = self._makeTranslationMessages( > - 'klob', 'klob', trunk_diverged=False, stable_diverged=False) > - trunk_message.is_current = False > - > - existing = self.script._mapExistingMessages(self.trunk_potmsgset) > - current, imported = self.script._findUsedMessages(existing) > - current_clash, imported_clash, twin = ( > - self.script._findClashesFromDicts( > - existing, current, imported, stable_message)) > - > - self.assertEqual(current_clash, None) > - self.assertEqual(imported_clash, None) > - self.assertEqual(twin, trunk_message) > - > - def test_FindClashesWithTwinFromDicts(self): > - # Like test_FindClashesWithTwin, but using cached dicts. > - trunk_message, stable_message = self._makeTranslationMessages( > - 'sniw', 'sniw', trunk_diverged=False, stable_diverged=False) > - > - existing = self.script._mapExistingMessages(self.trunk_potmsgset) > - current, imported = self.script._findUsedMessages(existing) > - current_clash, imported_clash, twin = ( > - self.script._findClashesFromDicts( > - existing, current, imported, stable_message)) > - > - self.assertEqual(current_clash, None) > - self.assertEqual(imported_clash, None) > - self.assertEqual(twin, trunk_message) > - > - def test_FindClashesWithNonTwinFromDicts(self): > - # Like test_FindClashesWithNonTwin, but using cached dicts. > - trunk_message, stable_message = self._makeTranslationMessages( > - 'sniw', 'sniw', trunk_diverged=False, stable_diverged=False) > - trunk_message.is_current = False > - current_message = self._makeTranslationMessage( > - self.trunk_pofile, self.trunk_potmsgset, 'gah', False) > - > - existing = self.script._mapExistingMessages(self.trunk_potmsgset) > - current, imported = self.script._findUsedMessages(existing) > - current_clash, imported_clash, twin = ( > - self.script._findClashesFromDicts( > - existing, current, imported, stable_message)) > - > - self.assertEqual(current_clash, current_message) > - self.assertEqual(imported_clash, None) > - self.assertEqual(twin, trunk_message) > - > > def test_suite(): > return TestLoader().loadTestsFromName(__name__)