Merge lp:~benji/launchpad/bug-742662 into lp:launchpad

Proposed by Benji York
Status: Rejected
Rejected by: Benji York
Proposed branch: lp:~benji/launchpad/bug-742662
Merge into: lp:launchpad
Diff against target: 313 lines (+146/-14)
5 files modified
lib/lp/translations/utilities/sanitize.py (+23/-7)
lib/lp/translations/utilities/tests/test_file_importer.py (+22/-4)
lib/lp/translations/utilities/tests/test_sanitize.py (+21/-0)
lib/lp/translations/utilities/tests/test_translation_importer.py (+63/-1)
lib/lp/translations/utilities/translation_import.py (+17/-2)
To merge this branch: bzr merge lp:~benji/launchpad/bug-742662
Reviewer Review Type Date Requested Status
Graham Binns (community) code Approve
Review via email: mp+74254@code.launchpad.net

Commit message

[r=gmb][bug=742662] report mixed newline usage when importing translations

Description of the change

Bug 742662 describes a scenario in which an imported translation
contains mixed newlines (\r as well as \n) and outlines three possible
fixes. This branch implements the option of disallowing such imports.
The software in place intended to do so, but the code to generate the
exception denoting a mixed newline condition was not being called. This
branch adds that code and tests for it.

The Sanitize class had a method that both normalized newlines as well as
reporting mixed newline violations. It didn't make sense to separate
out those two bits of functionality so I added a facade
(verifyNewlineConsistency) that only claims to validate newlines
(discarding the normalized result) and used that. I then added an
intermediary function (verify_message_newline_consistency) that applies
the newline verification to every string of natural-language text in a
translation message and then called that function when importing a
message (in importFile in
lib/lp/translations/utilities/translation_import.py).

Tests: the new tests that were added (and several related tests) can be
run with this command:

    bin/test -c -t test_sanitize -t test_file_importer \
        -t test_translation_importer

Lint: the "make lint" report is clean (after fixing some pre-existing
lint).

QA: I honestly don't know how to QA this. I'll have to ask Danilo or
one of the other translations guys. It might be that I can export the
template mentioned in the bug report
(https://translations.launchpad.net/ubuntu/natty/+source/alsa-utils/+pots/alsa-utils/bs/+translate)
and reimport it. It should generate an error message about mixed
newlines on reimport.

To post a comment you must log in.
Revision history for this message
Graham Binns (gmb) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
=== modified file 'lib/lp/translations/utilities/sanitize.py'
--- lib/lp/translations/utilities/sanitize.py 2010-09-29 15:46:26 +0000
+++ lib/lp/translations/utilities/sanitize.py 2011-09-06 15:38:41 +0000
@@ -4,6 +4,7 @@
4__metaclass__ = type4__metaclass__ = type
5__all__ = [5__all__ = [
6 'MixedNewlineMarkersError',6 'MixedNewlineMarkersError',
7 'Sanitizer',
7 'sanitize_translations_from_webui',8 'sanitize_translations_from_webui',
8 'sanitize_translations_from_import',9 'sanitize_translations_from_import',
9 ]10 ]
@@ -47,29 +48,44 @@
47 self.newline_style = self._getNewlineStyle(48 self.newline_style = self._getNewlineStyle(
48 english_singular, 'Original')49 english_singular, 'Original')
4950
50 def _getNewlineStyle(self, text, text_name):51 @classmethod
52 def _getNewlineStyle(cls, text, text_name=None):
51 """Find out which newline style is used in text."""53 """Find out which newline style is used in text."""
52 error_message = (
53 "%s text (%r) mixes different newline markers." % (
54 text_name, text))
55 style = None54 style = None
56 # To avoid confusing the single-character newline styles for mac and55 # To avoid confusing the single-character newline styles for mac and
57 # unix with the two-character windows one, remove the windows-style56 # unix with the two-character windows one, remove the windows-style
58 # newlines from the text and use that text to search for the other57 # newlines from the text and use that text to search for the other
59 # two.58 # two.
60 stripped_text = text.replace(self.windows_style, u'')59 stripped_text = text.replace(cls.windows_style, u'')
61 if text != stripped_text:60 if text != stripped_text:
62 # Text contains windows style new lines.61 # Text contains windows style new lines.
63 style = self.windows_style62 style = cls.windows_style
6463
65 for one_char_style in (self.mac_style, self.unix_style):64 for one_char_style in (cls.mac_style, cls.unix_style):
66 if one_char_style in stripped_text:65 if one_char_style in stripped_text:
67 if style is not None:66 if style is not None:
67 # If the text has a descriptive name (like "Original"),
68 # then format it for inclusion in the error message.
69 if text_name is not None:
70 text_name += ' '
71 else:
72 text_name = ''
73
74 error_message = (
75 "%stext (%r) mixes different newline markers." % (
76 text_name, text))
77
68 raise MixedNewlineMarkersError(error_message)78 raise MixedNewlineMarkersError(error_message)
69 style = one_char_style79 style = one_char_style
7080
71 return style81 return style
7282
83 @classmethod
84 def verifyNewlineConsistency(cls, text, text_name=None):
85 """Raise a MixedNewlineMarkersError if the newlines are inconsistent.
86 """
87 cls._getNewlineStyle(text, text_name)
88
73 def sanitize(self, translation_text):89 def sanitize(self, translation_text):
74 """Return 'translation_text' or None after doing some sanitization.90 """Return 'translation_text' or None after doing some sanitization.
7591
7692
=== modified file 'lib/lp/translations/utilities/tests/test_file_importer.py'
--- lib/lp/translations/utilities/tests/test_file_importer.py 2011-06-09 10:50:25 +0000
+++ lib/lp/translations/utilities/tests/test_file_importer.py 2011-09-06 15:38:41 +0000
@@ -31,6 +31,7 @@
31 ITranslationImportQueue,31 ITranslationImportQueue,
32 )32 )
33from lp.translations.utilities.gettext_po_importer import GettextPOImporter33from lp.translations.utilities.gettext_po_importer import GettextPOImporter
34from lp.translations.utilities.sanitize import MixedNewlineMarkersError
34from lp.translations.utilities.translation_common_format import (35from lp.translations.utilities.translation_common_format import (
35 TranslationMessageData,36 TranslationMessageData,
36 )37 )
@@ -246,7 +247,7 @@
246 message.addTranslation(0, u'')247 message.addTranslation(0, u'')
247248
248 potmsgset = self.factory.makePOTMsgSet(249 potmsgset = self.factory.makePOTMsgSet(
249 potemplate = importer.potemplate, sequence=50)250 potemplate=importer.potemplate, sequence=50)
250 translation = importer.storeTranslationsInDatabase(251 translation = importer.storeTranslationsInDatabase(
251 message, potmsgset)252 message, potmsgset)
252 # No TranslationMessage is created.253 # No TranslationMessage is created.
@@ -353,6 +354,24 @@
353 po_importer.potemplate.displayname),354 po_importer.potemplate.displayname),
354 'Did not create the correct comment for %s' % test_email)355 'Did not create the correct comment for %s' % test_email)
355356
357 def test_FileImporter_rejects_mixed_newline_styles(self):
358 # Files with mixed newlines (Mac, Unix, Windows) will generate an
359 # error. See the Sanitizer tests for comprehensive tests (all
360 # combinations, etc.).
361 pofile_contents = dedent(r"""
362 msgid ""
363 msgstr ""
364 "POT-Creation-Date: 2001-04-07 16:30+0500\n"
365 "Content-Type: text/plain; charset=CHARSET\n"
366 "Language: English\n"
367
368 msgid "bar"
369 msgstr "mixed\rnewlines\n"
370 """)
371
372 pot_importer = self._createPOTFileImporter(pofile_contents, True)
373 self.assertRaises(MixedNewlineMarkersError, pot_importer.importFile)
374
356 def test_getPersonByEmail_personless_account(self):375 def test_getPersonByEmail_personless_account(self):
357 # An Account without a Person attached is a difficult case for376 # An Account without a Person attached is a difficult case for
358 # _getPersonByEmail: it has to create the Person but re-use an377 # _getPersonByEmail: it has to create the Person but re-use an
@@ -566,7 +585,7 @@
566 def test_not_raises_OutdatedTranslationError_on_upstream_uploads(self):585 def test_not_raises_OutdatedTranslationError_on_upstream_uploads(self):
567 queue_entry = self._make_queue_entry(True)586 queue_entry = self._make_queue_entry(True)
568 try:587 try:
569 importer = POFileImporter(queue_entry, GettextPOImporter(), None)588 POFileImporter(queue_entry, GettextPOImporter(), None)
570 except OutdatedTranslationError:589 except OutdatedTranslationError:
571 self.fail("OutdatedTranslationError raised.")590 self.fail("OutdatedTranslationError raised.")
572591
@@ -574,7 +593,7 @@
574 queue_entry = self._make_queue_entry(True)593 queue_entry = self._make_queue_entry(True)
575 pofile = queue_entry.pofile594 pofile = queue_entry.pofile
576 old_raw_header = pofile.header595 old_raw_header = pofile.header
577 importer = POFileImporter(queue_entry, GettextPOImporter(), None)596 POFileImporter(queue_entry, GettextPOImporter(), None)
578 self.assertEqual(old_raw_header, pofile.header)597 self.assertEqual(old_raw_header, pofile.header)
579598
580599
@@ -762,4 +781,3 @@
762 importer = POFileImporter(781 importer = POFileImporter(
763 entry, importers[TranslationFileFormat.PO], None)782 entry, importers[TranslationFileFormat.PO], None)
764 self.assertTrue(importer.is_upstream_import_on_sourcepackage)783 self.assertTrue(importer.is_upstream_import_on_sourcepackage)
765
766784
=== modified file 'lib/lp/translations/utilities/tests/test_sanitize.py'
--- lib/lp/translations/utilities/tests/test_sanitize.py 2011-08-12 11:37:08 +0000
+++ lib/lp/translations/utilities/tests/test_sanitize.py 2011-09-06 15:38:41 +0000
@@ -158,6 +158,27 @@
158 MixedNewlineMarkersError,158 MixedNewlineMarkersError,
159 sanitizer.normalizeNewlines, translation_text)159 sanitizer.normalizeNewlines, translation_text)
160160
161 def test_verifyNewlineConsistency_with_consistent_newlines(self):
162 # Consistent newlines in the text will not raise an exception.
163 translation_template = u"Text with %s consistent %s newlines."
164 for translation_newline in self.newline_styles:
165 translation_text = translation_template % (
166 translation_newline, translation_newline)
167 Sanitizer.verifyNewlineConsistency(translation_text)
168
169 def test_verifyNewlineConsistency_with_mixed_newlines(self):
170 # Consistent newlines in the text will not raise an exception.
171 translation_template = u"Text with %s mixed %s newlines."
172 for translation_newline_1 in self.newline_styles:
173 other_newlines = self.newline_styles[:]
174 other_newlines.remove(translation_newline_1)
175 for translation_newline_2 in other_newlines:
176 translation_text = translation_template % (
177 translation_newline_1, translation_newline_2)
178 self.assertRaises(
179 MixedNewlineMarkersError,
180 Sanitizer.verifyNewlineConsistency, translation_text)
181
161 def test_sanitize(self):182 def test_sanitize(self):
162 # Calling the Sanitizer object will apply all sanitization procedures.183 # Calling the Sanitizer object will apply all sanitization procedures.
163 sanitizer = Sanitizer(u"Text with\nnewline.")184 sanitizer = Sanitizer(u"Text with\nnewline.")
164185
=== modified file 'lib/lp/translations/utilities/tests/test_translation_importer.py'
--- lib/lp/translations/utilities/tests/test_translation_importer.py 2011-02-14 17:14:17 +0000
+++ lib/lp/translations/utilities/tests/test_translation_importer.py 2011-09-06 15:38:41 +0000
@@ -9,7 +9,10 @@
99
10from canonical.testing.layers import LaunchpadZopelessLayer10from canonical.testing.layers import LaunchpadZopelessLayer
11from lp.services.log.logger import DevNullLogger11from lp.services.log.logger import DevNullLogger
12from lp.testing import TestCaseWithFactory12from lp.testing import (
13 TestCase,
14 TestCaseWithFactory,
15 )
13from lp.testing.matchers import Provides16from lp.testing.matchers import Provides
14from lp.translations.enums import RosettaImportStatus17from lp.translations.enums import RosettaImportStatus
15from lp.translations.interfaces.translationfileformat import (18from lp.translations.interfaces.translationfileformat import (
@@ -21,12 +24,15 @@
21from lp.translations.utilities.translation_common_format import (24from lp.translations.utilities.translation_common_format import (
22 TranslationMessageData,25 TranslationMessageData,
23 )26 )
27from lp.translations.utilities.sanitize import MixedNewlineMarkersError
24from lp.translations.utilities.translation_import import (28from lp.translations.utilities.translation_import import (
25 importers,29 importers,
26 is_identical_translation,30 is_identical_translation,
31 message_text_attributes,
27 POFileImporter,32 POFileImporter,
28 POTFileImporter,33 POTFileImporter,
29 TranslationImporter,34 TranslationImporter,
35 verify_message_newline_consistency,
30 )36 )
3137
3238
@@ -301,3 +307,59 @@
301 importer = POFileImporter(queue_entry, FakeParser(), DevNullLogger())307 importer = POFileImporter(queue_entry, FakeParser(), DevNullLogger())
302 importer.importMessage(message)308 importer.importMessage(message)
303 self.assertEqual(old_file_references, potmsgset.filereferences)309 self.assertEqual(old_file_references, potmsgset.filereferences)
310
311
312class FakeMessage:
313 msgid_singular = None
314 msgid_plural = None
315 singular_text = None
316 plural_text = None
317 translations = ()
318
319
320class NewlineVerificationTests(TestCase):
321 def test_attribute_ok(self):
322 # If a message's attributes are None or strings without mixed
323 # newlines, then no exception is raise.
324 for attr_name in message_text_attributes:
325 message = FakeMessage()
326 message.msgid_singular = 'no mixed newlines here'
327 # The next call doesn't raise an exception.
328 verify_message_newline_consistency(message)
329
330 def test_attribute_not_ok(self):
331 # If a message's attributes contain mixed newlines, an exception is
332 # raised.
333 for attr_name in message_text_attributes:
334 message = FakeMessage()
335 setattr(message, attr_name, 'mixed\rnewlines\n')
336 e = self.assertRaises(
337 MixedNewlineMarkersError,
338 verify_message_newline_consistency, message)
339 # The error message mentions the attribute name.
340 self.assertEqual(
341 e.args[0],
342 "%s text ('mixed\\rnewlines\\n') " % attr_name +
343 "mixes different newline markers.")
344
345 def test_translations_ok(self):
346 # If a message's translations are None or strings without mixed
347 # newlines, then no exception is raise.
348 message = FakeMessage()
349 message.translations = ['no mixed newlines here']
350 # The next call doesn't raise an exception.
351 verify_message_newline_consistency(message)
352
353 def test_translations_not_ok(self):
354 # If a message's translations contain mixed newlines, an exception is
355 # raised.
356 message = FakeMessage()
357 message.translations = ['mixed\rnewlines\n']
358 e = self.assertRaises(
359 MixedNewlineMarkersError,
360 verify_message_newline_consistency, message)
361 # The error message mentions the translation index ("0" in this case).
362 self.assertEqual(
363 e.args[0],
364 "translation 0 text ('mixed\\rnewlines\\n') "
365 "mixes different newline markers.")
304366
=== modified file 'lib/lp/translations/utilities/translation_import.py'
--- lib/lp/translations/utilities/translation_import.py 2011-03-24 11:21:19 +0000
+++ lib/lp/translations/utilities/translation_import.py 2011-09-06 15:38:41 +0000
@@ -59,6 +59,7 @@
59from lp.translations.utilities.mozilla_xpi_importer import MozillaXpiImporter59from lp.translations.utilities.mozilla_xpi_importer import MozillaXpiImporter
60from lp.translations.utilities.sanitize import (60from lp.translations.utilities.sanitize import (
61 sanitize_translations_from_import,61 sanitize_translations_from_import,
62 Sanitizer,
62 )63 )
63from lp.translations.utilities.translation_common_format import (64from lp.translations.utilities.translation_common_format import (
64 TranslationMessageData,65 TranslationMessageData,
@@ -75,6 +76,19 @@
75 TranslationFileFormat.XPI: MozillaXpiImporter(),76 TranslationFileFormat.XPI: MozillaXpiImporter(),
76 }77 }
7778
79message_text_attributes = [
80 'msgid_singular', 'msgid_plural', 'singular_text', 'plural_text']
81
82
83def verify_message_newline_consistency(message):
84 for attr_name in message_text_attributes:
85 value = getattr(message, attr_name)
86 if value is not None:
87 Sanitizer.verifyNewlineConsistency(value, attr_name)
88 for index, translation in enumerate(message.translations):
89 Sanitizer.verifyNewlineConsistency(
90 translation, 'translation %d' % index)
91
7892
79def is_identical_translation(existing_msg, new_msg):93def is_identical_translation(existing_msg, new_msg):
80 """Is a new translation substantially the same as the existing one?94 """Is a new translation substantially the same as the existing one?
@@ -373,7 +387,7 @@
373 """387 """
374388
375 def __init__(self, translation_import_queue_entry,389 def __init__(self, translation_import_queue_entry,
376 importer, logger = None):390 importer, logger=None):
377 """Base constructor to set up common attributes and parse the imported391 """Base constructor to set up common attributes and parse the imported
378 file into a member variable (self.translation_file).392 file into a member variable (self.translation_file).
379393
@@ -578,6 +592,7 @@
578592
579 for message in self.translation_file.messages:593 for message in self.translation_file.messages:
580 if message.msgid_singular:594 if message.msgid_singular:
595 verify_message_newline_consistency(message)
581 self.importMessage(message)596 self.importMessage(message)
582597
583 return self.errors, self.translation_file.syntax_warnings598 return self.errors, self.translation_file.syntax_warnings
@@ -680,7 +695,7 @@
680 message._translations = None695 message._translations = None
681696
682 if len(message.flags) > 0:697 if len(message.flags) > 0:
683 flags_comment = u", "+u", ".join(message.flags)698 flags_comment = u", " + u", ".join(message.flags)
684 else:699 else:
685 flags_comment = u""700 flags_comment = u""
686701