Hi Jeroen, This looks like a nice refactoring. I only have very minor suggestions. merge-conditional -Edwin >=== modified file 'lib/lp/translations/doc/translationimportqueue.txt' >--- lib/lp/translations/doc/translationimportqueue.txt 2009-09-25 15:40:58 +0000 >+++ lib/lp/translations/doc/translationimportqueue.txt 2009-09-30 19:25:02 +0000 >@@ -4,6 +4,14 @@ > into Rosetta. > > >>> from zope.security.proxy import removeSecurityProxy >+ >>> from canonical.launchpad.interfaces.lpstorm import IStore >+ >>> from lp.translations.model.translationimportqueue import ( >+ ... TranslationImportQueueEntry) >+ >+ >>> def clear_queue(queue): >+ ... """Remove all entries off the import queue.""" >+ ... store = IStore(TranslationImportQueueEntry) >+ ... store.find(TranslationImportQueueEntry).remove() > > > == getGuessedPOFile == >@@ -1296,6 +1304,15 @@ > If such bad requests do end up on the import queue, the import queue code will > raise errors about them. > >+ >>> def print_import_failures(import_script): >+ ... """List failures recorded in import script instance.""" Should that be singular with an article: "in an/the import script instance" or plural without an article: "in import script instances" >+ ... for reason, entries in script.failures.iteritems(): >+ ... print reason >+ ... for entry in entries: >+ ... print "-> " + entry >+ >+ >>> clear_queue(translationimportqueue) >+ > >>> entry = translationimportqueue.addOrUpdateEntry( > ... u'po/sr.po', 'foo', True, rosetta_experts, > ... productseries=evolution_productseries) >@@ -1306,12 +1323,29 @@ > >>> entry.setStatus(RosettaImportStatus.APPROVED) > > >>> import logging >- >>> from lp.translations.scripts.po_import import ImportProcess >- >>> ImportProcess(transaction, logging).run() >- Traceback (most recent call last): >- ... >- AssertionError: Broken translation import queue entry 4 (for evolution): >- ... >+ >>> from canonical.launchpad.scripts import FakeLogger >+ >>> from lp.translations.scripts.po_import import TranslationsImport >+ >+ >>> script = TranslationsImport('poimport', test_args=[]) >+ >>> script.logger = FakeLogger() >+ >>> script.main() >+ DEBUG Starting... >+ ERROR Entry is approved but has no place to import to. >+ ... >+ DEBUG Finished the import process. >+ >+ >>> print_import_failures(script) >+ Entry is approved but has no place to import to. >+ -> 'po/sr.po' (id ...) in Evolution trunk series >+ >+The entry is marked as Failed. >+ >+ >>> print entry.status.name >+ FAILED >+ >+This happens for distribution packages as well as products. >+ >+ >>> clear_queue(translationimportqueue) > > >>> entry = translationimportqueue.addOrUpdateEntry( > ... u'po/th.po', 'bar', False, rosetta_experts, >@@ -1323,25 +1357,21 @@ > > >>> entry.setStatus(RosettaImportStatus.APPROVED) > >- >>> ImportProcess(transaction, logging).run() >- Traceback (most recent call last): >- ... >- AssertionError: Broken translation import queue entry 4 (for evolution): >- ... >+ >>> script = TranslationsImport('poimport', test_args=[]) >+ >>> script.logger.setLevel(logging.FATAL) >+ >>> script.main() >+ >>> print_import_failures(script) >+ Entry is approved but has no place to import to. >+ -> 'po/th.po' (id ...) in ubuntu Hoary package evolution >+ >+ >>> print entry.status.name >+ FAILED >+ >+ >>> clear_queue(translationimportqueue) > > > == getRequestTargets output ordering == > >-TranslationImportQueue.getRequestTargets first lists distro series >-(ordered by distro name and series name), followed by products (ordered >-by name). >- >- >>> print_list(get_target_names(RosettaImportStatus.APPROVED)) >- ubuntu/hoary Hoary >- evolution Evolution >- firefox Mozilla Firefox >- >- > The queue is populated with a wild mix of requests: for packages in > different release series of Ubuntu, for packages in different distros, > for product series, with different statuses; some were imported from >@@ -1388,29 +1418,24 @@ > >>> entry.setStatus(RosettaImportStatus.NEEDS_REVIEW) > >>> flush_database_updates() > >- >-The targets (distro series or products) that these requests are for are >-always listed in the same, predictable order. >+TranslationImportQueue.getRequestTargets first lists distro series >+(ordered by distro name and series name), followed by products (ordered >+by name). > > >>> print_list(get_target_names()) > debian/sarge Sarge > debian/woody Woody > kubuntu/krunch Krunch > ubuntu/grumpy Grumpy >- ubuntu/hoary Hoary > alsa-utils alsa-utils > evolution Evolution >- firefox Mozilla Firefox >- netapplet NetApplet > > >>> for status in RosettaImportStatus.items: > ... print "%s:" % status.name > ... print_list(get_target_names(status)) > APPROVED: > kubuntu/krunch Krunch >- ubuntu/hoary Hoary > evolution Evolution >- firefox Mozilla Firefox > IMPORTED: > evolution Evolution > DELETED: >@@ -1419,13 +1444,15 @@ > NEEDS_REVIEW: > debian/sarge Sarge > debian/woody Woody >- ubuntu/hoary Hoary >- evolution Evolution >- firefox Mozilla Firefox >- netapplet NetApplet > BLOCKED: > ubuntu/grumpy Grumpy > >+You can also select by status. >+ >+ >>> print_list(get_target_names(RosettaImportStatus.APPROVED)) >+ kubuntu/krunch Krunch >+ evolution Evolution >+ > > == cleanUpQueue == > > >=== modified file 'lib/lp/translations/scripts/po_import.py' >--- lib/lp/translations/scripts/po_import.py 2009-09-18 07:39:51 +0000 >+++ lib/lp/translations/scripts/po_import.py 2009-09-30 19:25:02 +0000 >@@ -10,44 +10,135 @@ > 'ImportProcess', > ] > >-import time >+from datetime import datetime, timedelta >+import sys > >+from pytz import UTC > from zope.component import getUtility > > from canonical.config import config >-from canonical.database.sqlbase import flush_database_updates > from canonical.launchpad import helpers > from canonical.launchpad.interfaces.launchpad import ILaunchpadCelebrities > from lp.translations.interfaces.translationimportqueue import ( > ITranslationImportQueue, RosettaImportStatus) > from canonical.launchpad.mail import simple_sendmail > from canonical.launchpad.mailnotification import MailWrapper >- >- >-class ImportProcess: >+from canonical.launchpad.webapp import errorlog >+from lp.services.scripts.base import LaunchpadCronScript >+ >+ >+class TranslationsImport(LaunchpadCronScript): > """Import .po and .pot files attached to Rosetta.""" >- >- def __init__(self, ztm, logger, max_seconds=3600): >- """Initialize the ImportProcess object. >- >- Arguments: >- >- :param ztm: transaction manager to commit our individual imports. >- >- :param logger: logging object to log informational messages and errors >- to. >- >- :param max_seconds: "alarm clock": after this many seconds, the job >- should finish up even if there is more work for it to do. This is a >- mere guideline; actual processing time may be longer. >- """ >- self.ztm = ztm >- self.logger = logger >- self.deadline = time.time() + max_seconds >- >- def run(self): >- """Execute the import of entries from the queue.""" >- # Get the queue. >+ # Time goal for one run. It is not exact. The script can run for >+ # longer than this, but will know to stop taking on new work. >+ # Since the script is run every 9 or 10 minutes, we set the "alarm" I would avoid calling this an alarm, since it makes me wonder if you are using signal.alarm(), even though you explain how it really works in the preceding sentence. >+ # at 8 minutes. That leaves a bit of time to complete the last >+ # ongoing batch of imports. >+ time_to_run = timedelta(minutes=8) >+ >+ # Failures to be reported in bulk at closing, so we don't accumulate >+ # thousands of oopses for repetitive errors. >+ failures = None What is the purpose of initializing it here in addition to in the constructor? Is it used to create documentation that knows about instance variables? >+ def __init__(self, *args, **kwargs): >+ super(TranslationsImport, self).__init__(*args, **kwargs) >+ self.failures = {} >+ >+ def _describeEntry(self, entry): >+ """Identify `entry` in a human-readable way.""" >+ if entry.import_into: >+ return "%s (id %d)" % (entry.import_into.title, entry.id) >+ >+ if entry.sourcepackagename: >+ return "'%s' (id %d) in %s %s package %s" % ( >+ entry.path, entry.id, >+ entry.distroseries.distribution.name, >+ entry.distroseries.displayname, >+ entry.sourcepackagename.name) >+ else: >+ return "'%s' (id %d) in %s" % ( >+ entry.path, entry.id, entry.productseries.title) >+ >+ def _reportOops(self, reason, entries, exc_info=None): >+ """Register an oops.""" >+ if exc_info is None: >+ exc_info = sys.exc_info() >+ description = [ >+ ('Reason', reason), >+ ('Queue entries', entries), >+ ] >+ errorlog.globalErrorUtility.raising( >+ exc_info, errorlog.ScriptRequest(description)) >+ >+ def _registerFailure(self, entry, reason, traceback=False, abort=False): >+ """Note that a queue entry is unusable in some way.""" >+ entry.setStatus(RosettaImportStatus.FAILED) >+ entry.setErrorOutput(reason) >+ >+ if abort: >+ traceback = True >+ >+ description = self._describeEntry(entry) >+ message = "%s -- %s" % (reason, description) >+ self.logger.error(message, exc_info=traceback) >+ >+ if abort: >+ # Serious enough to stop the script. Register as an >+ # individual oops. >+ self._reportOops(self, reason, [description]) >+ else: >+ # Register problem for bulk reporting later. >+ if not self.failures.get(reason): >+ self.failures[reason] = [] >+ self.failures[reason].append(description) >+ >+ def _checkEntry(self, entry): >+ """Sanity-check `entry` before importing.""" >+ if entry.import_into is None: >+ self._registerFailure( >+ entry, "Entry is approved but has no place to import to.") >+ return False >+ >+ template = entry.potemplate >+ if template: >+ if template.distroseries != entry.distroseries: >+ self._registerFailure( >+ entry, "Entry was approved for the wrong distroseries.") >+ return False >+ if template.productseries != entry.productseries: >+ self._registerFailure( >+ entry, "Entry was approved for the wrong productseries.") >+ return False >+ >+ return True >+ >+ def _importEntry(self, entry): >+ """Perform the import of one entry, and notify the uploader.""" >+ self.logger.info('Importing: %s' % entry.import_into.title) >+ target = entry.import_into >+ (mail_subject, mail_body) = target.importFromQueue(entry, self.logger) >+ >+ if mail_subject is not None: >+ # A `mail_subject` of None indicates that there >+ # is no notification worth sending out. >+ from_email = config.rosetta.admin_email >+ katie = getUtility(ILaunchpadCelebrities).katie >+ if entry.importer == katie: >+ # Email import state to Debian imports email. >+ to_email = config.rosetta.debian_import_email >+ else: >+ to_email = helpers.get_contact_email_addresses(entry.importer) >+ >+ if to_email: >+ text = MailWrapper().format(mail_body) >+ simple_sendmail(from_email, to_email, mail_subject, text) >+ >+ def main(self): >+ """Import entries from the queue.""" >+ self.logger.debug("Starting the import process.") >+ >+ errorlog.globalErrorUtility.configure('poimport') >+ self.deadline = datetime.now(UTC) + self.time_to_run > translation_import_queue = getUtility(ITranslationImportQueue) > > # Get the list of each product or distroseries with pending imports. >@@ -60,138 +151,48 @@ > self.logger.info("No requests pending.") > return > >- # XXX: JeroenVermeulen 2007-06-20: How on Earth do we test that the >- # deadline code works? It's only a small thing, and of course we'll >- # notice that it works when we stop getting errors about this script >- # not finishing. Meanwhile, SteveA has suggested a more general >- # solution. >- while importqueues and time.time() < self.deadline: >- # For fairness, service all queues at least once; don't check for >- # deadline. If we stopped halfway through the list of queues, we >- # would accidentally favour queues that happened to come out at >- # the front of the list. >+ have_work = True >+ >+ while have_work and datetime.now(UTC) < self.deadline: >+ have_work = False >+ >+ # For fairness, service all queues at least once; don't >+ # check for deadlines here or we'd favour some >+ # products/packages over others. > for queue in importqueues: >- # Make sure our previous state changes hit the database. >- # Otherwise, getFirstEntryToImport() might pick up an entry >- # we've already processed but haven't flushed yet. >- # XXX: JeroenVermeulen 2007-11-29 bug=3989: should become >- # unnecessary once Zopeless commit() improves. >- flush_database_updates() >- >- entry_to_import = queue.getFirstEntryToImport() >- if entry_to_import is None: >- continue >- >- if entry_to_import.import_into is None: >- if entry_to_import.sourcepackagename is not None: >- package = entry_to_import.sourcepackagename.name >- elif entry_to_import.productseries is not None: >- package = ( >- entry_to_import.productseries.product.displayname) >- else: >- raise AssertionError( >- "Import queue entry %d has neither a " >- "source package name nor a product series." >- % entry_to_import.id) >- raise AssertionError( >- "Broken translation import queue entry %d (for %s): " >- "it's Approved but lacks the place where it should " >- "be imported! A DBA will need to fix this by hand." >- % (entry_to_import.id, package)) >- >- # Do the import. >- title = '[Unknown Title]' >- try: >- title = entry_to_import.import_into.title >- self.logger.info('Importing: %s' % title) >- >- (mail_subject, mail_body) = ( >- entry_to_import.import_into.importFromQueue( >- entry_to_import, self.logger)) >- >- if mail_subject is not None: >- # A `mail_subject` of None indicates that there >- # is no notification worth sending out. >- from_email = config.rosetta.admin_email >- katie = getUtility(ILaunchpadCelebrities).katie >- if entry_to_import.importer == katie: >- # Email import state to Debian imports email. >- to_email = config.rosetta.debian_import_email >- else: >- to_email = helpers.get_contact_email_addresses( >- entry_to_import.importer) >- >- if to_email: >- text = MailWrapper().format(mail_body) >- >- # XXX: JeroenVermeulen 2007-11-29 bug=29744: email >- # isn't transactional in zopeless mode. That >- # means that our current transaction can fail >- # after we already sent out a success >- # notification. To prevent that, we commit the >- # import (attempt) before sending out the email. >- # That way the worst that can happen is that an >- # email goes missing. >- # Once bug 29744 is fixed, this commit must die so >- # the email and the import will be in a single >- # atomic operation. >- self.ztm.commit() >- self.ztm.begin() >- >- simple_sendmail( >- from_email, to_email, mail_subject, text) >- >- except KeyboardInterrupt: >- self.ztm.abort() >- raise >- except AssertionError: >- raise >- except: >- # If we have any exception, log it, abort the transaction >- # and set the status to FAILED. >- self.logger.error('Got an unexpected exception while' >- ' importing %s' % title, exc_info=1) >- # We are going to abort the transaction, need to save the >- # id of this entry to update its status. >- failed_entry_id = entry_to_import.id >- self.ztm.abort() >- # Get the needed objects to set the failed entry status as >- # FAILED. >- self.ztm.begin() >- translation_import_queue = getUtility( >- ITranslationImportQueue) >- entry_to_import = translation_import_queue[ >- failed_entry_id] >- entry_to_import.setStatus(RosettaImportStatus.FAILED) >- self.ztm.commit() >- self.ztm.begin() >- # Go to process next entry. >- continue >- >- # As soon as the import is done, we commit the transaction >- # so it's not lost. >- try: >- self.ztm.commit() >- self.ztm.begin() >- except KeyboardInterrupt: >- self.ztm.abort() >- raise >- except: >- # If we have any exception, we log it and abort the >- # transaction. >- self.logger.error( >- 'We got an unexpected exception while committing the ' >- 'transaction', >- exc_info=1) >- self.ztm.abort() >- self.ztm.begin() >- >- # Refresh the list of objects with pending imports. >- importqueues = ( >- translation_import_queue.getRequestTargets( >- RosettaImportStatus.APPROVED)) >- >- if not importqueues: >+ entry = queue.getFirstEntryToImport() >+ if entry is None: >+ continue >+ >+ have_work = True >+ >+ try: >+ if self._checkEntry(entry): >+ self._importEntry(entry) >+ if self.txn: >+ self.txn.commit() >+ except KeyboardInterrupt: >+ raise >+ except (AssertionError, SystemError), e: >+ self._registerFailure(entry, e, abort=True) >+ raise >+ except Exception, e: >+ if self.txn: >+ self.txn.abort() >+ self._registerFailure(entry, e, traceback=True) >+ if self.txn: >+ self.txn.commit() >+ >+ if have_work: >+ self.logger.info("Used up available time.") >+ else: > self.logger.info("Import requests completed.") >- else: >- self.logger.info("Used up available time.") >+ >+ self._reportFailures() >+ Trailing white space. >+ self.logger.debug("Finished the import process.") >+ >+ def _reportFailures(self): >+ """Bulk-report deferred failures as oopses.""" >+ for reason, entries in self.failures.iteritems(): >+ self._reportOops(reason, entries) >