Merge lp:~benji/launchpad/bug-742662 into lp:launchpad

Proposed by Benji York
Status: Rejected
Rejected by: Benji York
Proposed branch: lp:~benji/launchpad/bug-742662
Merge into: lp:launchpad
Diff against target: 313 lines (+146/-14)
5 files modified
lib/lp/translations/utilities/sanitize.py (+23/-7)
lib/lp/translations/utilities/tests/test_file_importer.py (+22/-4)
lib/lp/translations/utilities/tests/test_sanitize.py (+21/-0)
lib/lp/translations/utilities/tests/test_translation_importer.py (+63/-1)
lib/lp/translations/utilities/translation_import.py (+17/-2)
To merge this branch: bzr merge lp:~benji/launchpad/bug-742662
Reviewer Review Type Date Requested Status
Graham Binns (community) code Approve
Review via email: mp+74254@code.launchpad.net

Commit message

[r=gmb][bug=742662] report mixed newline usage when importing translations

Description of the change

Bug 742662 describes a scenario in which an imported translation
contains mixed newlines (\r as well as \n) and outlines three possible
fixes. This branch implements the option of disallowing such imports.
The software in place intended to do so, but the code to generate the
exception denoting a mixed newline condition was not being called. This
branch adds that code and tests for it.

The Sanitize class had a method that both normalized newlines as well as
reporting mixed newline violations. It didn't make sense to separate
out those two bits of functionality so I added a facade
(verifyNewlineConsistency) that only claims to validate newlines
(discarding the normalized result) and used that. I then added an
intermediary function (verify_message_newline_consistency) that applies
the newline verification to every string of natural-language text in a
translation message and then called that function when importing a
message (in importFile in
lib/lp/translations/utilities/translation_import.py).

Tests: the new tests that were added (and several related tests) can be
run with this command:

    bin/test -c -t test_sanitize -t test_file_importer \
        -t test_translation_importer

Lint: the "make lint" report is clean (after fixing some pre-existing
lint).

QA: I honestly don't know how to QA this. I'll have to ask Danilo or
one of the other translations guys. It might be that I can export the
template mentioned in the bug report
(https://translations.launchpad.net/ubuntu/natty/+source/alsa-utils/+pots/alsa-utils/bs/+translate)
and reimport it. It should generate an error message about mixed
newlines on reimport.

To post a comment you must log in.
Revision history for this message
Graham Binns (gmb) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/translations/utilities/sanitize.py'
2--- lib/lp/translations/utilities/sanitize.py 2010-09-29 15:46:26 +0000
3+++ lib/lp/translations/utilities/sanitize.py 2011-09-06 15:38:41 +0000
4@@ -4,6 +4,7 @@
5 __metaclass__ = type
6 __all__ = [
7 'MixedNewlineMarkersError',
8+ 'Sanitizer',
9 'sanitize_translations_from_webui',
10 'sanitize_translations_from_import',
11 ]
12@@ -47,29 +48,44 @@
13 self.newline_style = self._getNewlineStyle(
14 english_singular, 'Original')
15
16- def _getNewlineStyle(self, text, text_name):
17+ @classmethod
18+ def _getNewlineStyle(cls, text, text_name=None):
19 """Find out which newline style is used in text."""
20- error_message = (
21- "%s text (%r) mixes different newline markers." % (
22- text_name, text))
23 style = None
24 # To avoid confusing the single-character newline styles for mac and
25 # unix with the two-character windows one, remove the windows-style
26 # newlines from the text and use that text to search for the other
27 # two.
28- stripped_text = text.replace(self.windows_style, u'')
29+ stripped_text = text.replace(cls.windows_style, u'')
30 if text != stripped_text:
31 # Text contains windows style new lines.
32- style = self.windows_style
33+ style = cls.windows_style
34
35- for one_char_style in (self.mac_style, self.unix_style):
36+ for one_char_style in (cls.mac_style, cls.unix_style):
37 if one_char_style in stripped_text:
38 if style is not None:
39+ # If the text has a descriptive name (like "Original"),
40+ # then format it for inclusion in the error message.
41+ if text_name is not None:
42+ text_name += ' '
43+ else:
44+ text_name = ''
45+
46+ error_message = (
47+ "%stext (%r) mixes different newline markers." % (
48+ text_name, text))
49+
50 raise MixedNewlineMarkersError(error_message)
51 style = one_char_style
52
53 return style
54
55+ @classmethod
56+ def verifyNewlineConsistency(cls, text, text_name=None):
57+ """Raise a MixedNewlineMarkersError if the newlines are inconsistent.
58+ """
59+ cls._getNewlineStyle(text, text_name)
60+
61 def sanitize(self, translation_text):
62 """Return 'translation_text' or None after doing some sanitization.
63
64
65=== modified file 'lib/lp/translations/utilities/tests/test_file_importer.py'
66--- lib/lp/translations/utilities/tests/test_file_importer.py 2011-06-09 10:50:25 +0000
67+++ lib/lp/translations/utilities/tests/test_file_importer.py 2011-09-06 15:38:41 +0000
68@@ -31,6 +31,7 @@
69 ITranslationImportQueue,
70 )
71 from lp.translations.utilities.gettext_po_importer import GettextPOImporter
72+from lp.translations.utilities.sanitize import MixedNewlineMarkersError
73 from lp.translations.utilities.translation_common_format import (
74 TranslationMessageData,
75 )
76@@ -246,7 +247,7 @@
77 message.addTranslation(0, u'')
78
79 potmsgset = self.factory.makePOTMsgSet(
80- potemplate = importer.potemplate, sequence=50)
81+ potemplate=importer.potemplate, sequence=50)
82 translation = importer.storeTranslationsInDatabase(
83 message, potmsgset)
84 # No TranslationMessage is created.
85@@ -353,6 +354,24 @@
86 po_importer.potemplate.displayname),
87 'Did not create the correct comment for %s' % test_email)
88
89+ def test_FileImporter_rejects_mixed_newline_styles(self):
90+ # Files with mixed newlines (Mac, Unix, Windows) will generate an
91+ # error. See the Sanitizer tests for comprehensive tests (all
92+ # combinations, etc.).
93+ pofile_contents = dedent(r"""
94+ msgid ""
95+ msgstr ""
96+ "POT-Creation-Date: 2001-04-07 16:30+0500\n"
97+ "Content-Type: text/plain; charset=CHARSET\n"
98+ "Language: English\n"
99+
100+ msgid "bar"
101+ msgstr "mixed\rnewlines\n"
102+ """)
103+
104+ pot_importer = self._createPOTFileImporter(pofile_contents, True)
105+ self.assertRaises(MixedNewlineMarkersError, pot_importer.importFile)
106+
107 def test_getPersonByEmail_personless_account(self):
108 # An Account without a Person attached is a difficult case for
109 # _getPersonByEmail: it has to create the Person but re-use an
110@@ -566,7 +585,7 @@
111 def test_not_raises_OutdatedTranslationError_on_upstream_uploads(self):
112 queue_entry = self._make_queue_entry(True)
113 try:
114- importer = POFileImporter(queue_entry, GettextPOImporter(), None)
115+ POFileImporter(queue_entry, GettextPOImporter(), None)
116 except OutdatedTranslationError:
117 self.fail("OutdatedTranslationError raised.")
118
119@@ -574,7 +593,7 @@
120 queue_entry = self._make_queue_entry(True)
121 pofile = queue_entry.pofile
122 old_raw_header = pofile.header
123- importer = POFileImporter(queue_entry, GettextPOImporter(), None)
124+ POFileImporter(queue_entry, GettextPOImporter(), None)
125 self.assertEqual(old_raw_header, pofile.header)
126
127
128@@ -762,4 +781,3 @@
129 importer = POFileImporter(
130 entry, importers[TranslationFileFormat.PO], None)
131 self.assertTrue(importer.is_upstream_import_on_sourcepackage)
132-
133
134=== modified file 'lib/lp/translations/utilities/tests/test_sanitize.py'
135--- lib/lp/translations/utilities/tests/test_sanitize.py 2011-08-12 11:37:08 +0000
136+++ lib/lp/translations/utilities/tests/test_sanitize.py 2011-09-06 15:38:41 +0000
137@@ -158,6 +158,27 @@
138 MixedNewlineMarkersError,
139 sanitizer.normalizeNewlines, translation_text)
140
141+ def test_verifyNewlineConsistency_with_consistent_newlines(self):
142+ # Consistent newlines in the text will not raise an exception.
143+ translation_template = u"Text with %s consistent %s newlines."
144+ for translation_newline in self.newline_styles:
145+ translation_text = translation_template % (
146+ translation_newline, translation_newline)
147+ Sanitizer.verifyNewlineConsistency(translation_text)
148+
149+ def test_verifyNewlineConsistency_with_mixed_newlines(self):
150+ # Consistent newlines in the text will not raise an exception.
151+ translation_template = u"Text with %s mixed %s newlines."
152+ for translation_newline_1 in self.newline_styles:
153+ other_newlines = self.newline_styles[:]
154+ other_newlines.remove(translation_newline_1)
155+ for translation_newline_2 in other_newlines:
156+ translation_text = translation_template % (
157+ translation_newline_1, translation_newline_2)
158+ self.assertRaises(
159+ MixedNewlineMarkersError,
160+ Sanitizer.verifyNewlineConsistency, translation_text)
161+
162 def test_sanitize(self):
163 # Calling the Sanitizer object will apply all sanitization procedures.
164 sanitizer = Sanitizer(u"Text with\nnewline.")
165
166=== modified file 'lib/lp/translations/utilities/tests/test_translation_importer.py'
167--- lib/lp/translations/utilities/tests/test_translation_importer.py 2011-02-14 17:14:17 +0000
168+++ lib/lp/translations/utilities/tests/test_translation_importer.py 2011-09-06 15:38:41 +0000
169@@ -9,7 +9,10 @@
170
171 from canonical.testing.layers import LaunchpadZopelessLayer
172 from lp.services.log.logger import DevNullLogger
173-from lp.testing import TestCaseWithFactory
174+from lp.testing import (
175+ TestCase,
176+ TestCaseWithFactory,
177+ )
178 from lp.testing.matchers import Provides
179 from lp.translations.enums import RosettaImportStatus
180 from lp.translations.interfaces.translationfileformat import (
181@@ -21,12 +24,15 @@
182 from lp.translations.utilities.translation_common_format import (
183 TranslationMessageData,
184 )
185+from lp.translations.utilities.sanitize import MixedNewlineMarkersError
186 from lp.translations.utilities.translation_import import (
187 importers,
188 is_identical_translation,
189+ message_text_attributes,
190 POFileImporter,
191 POTFileImporter,
192 TranslationImporter,
193+ verify_message_newline_consistency,
194 )
195
196
197@@ -301,3 +307,59 @@
198 importer = POFileImporter(queue_entry, FakeParser(), DevNullLogger())
199 importer.importMessage(message)
200 self.assertEqual(old_file_references, potmsgset.filereferences)
201+
202+
203+class FakeMessage:
204+ msgid_singular = None
205+ msgid_plural = None
206+ singular_text = None
207+ plural_text = None
208+ translations = ()
209+
210+
211+class NewlineVerificationTests(TestCase):
212+ def test_attribute_ok(self):
213+ # If a message's attributes are None or strings without mixed
214+ # newlines, then no exception is raise.
215+ for attr_name in message_text_attributes:
216+ message = FakeMessage()
217+ message.msgid_singular = 'no mixed newlines here'
218+ # The next call doesn't raise an exception.
219+ verify_message_newline_consistency(message)
220+
221+ def test_attribute_not_ok(self):
222+ # If a message's attributes contain mixed newlines, an exception is
223+ # raised.
224+ for attr_name in message_text_attributes:
225+ message = FakeMessage()
226+ setattr(message, attr_name, 'mixed\rnewlines\n')
227+ e = self.assertRaises(
228+ MixedNewlineMarkersError,
229+ verify_message_newline_consistency, message)
230+ # The error message mentions the attribute name.
231+ self.assertEqual(
232+ e.args[0],
233+ "%s text ('mixed\\rnewlines\\n') " % attr_name +
234+ "mixes different newline markers.")
235+
236+ def test_translations_ok(self):
237+ # If a message's translations are None or strings without mixed
238+ # newlines, then no exception is raise.
239+ message = FakeMessage()
240+ message.translations = ['no mixed newlines here']
241+ # The next call doesn't raise an exception.
242+ verify_message_newline_consistency(message)
243+
244+ def test_translations_not_ok(self):
245+ # If a message's translations contain mixed newlines, an exception is
246+ # raised.
247+ message = FakeMessage()
248+ message.translations = ['mixed\rnewlines\n']
249+ e = self.assertRaises(
250+ MixedNewlineMarkersError,
251+ verify_message_newline_consistency, message)
252+ # The error message mentions the translation index ("0" in this case).
253+ self.assertEqual(
254+ e.args[0],
255+ "translation 0 text ('mixed\\rnewlines\\n') "
256+ "mixes different newline markers.")
257
258=== modified file 'lib/lp/translations/utilities/translation_import.py'
259--- lib/lp/translations/utilities/translation_import.py 2011-03-24 11:21:19 +0000
260+++ lib/lp/translations/utilities/translation_import.py 2011-09-06 15:38:41 +0000
261@@ -59,6 +59,7 @@
262 from lp.translations.utilities.mozilla_xpi_importer import MozillaXpiImporter
263 from lp.translations.utilities.sanitize import (
264 sanitize_translations_from_import,
265+ Sanitizer,
266 )
267 from lp.translations.utilities.translation_common_format import (
268 TranslationMessageData,
269@@ -75,6 +76,19 @@
270 TranslationFileFormat.XPI: MozillaXpiImporter(),
271 }
272
273+message_text_attributes = [
274+ 'msgid_singular', 'msgid_plural', 'singular_text', 'plural_text']
275+
276+
277+def verify_message_newline_consistency(message):
278+ for attr_name in message_text_attributes:
279+ value = getattr(message, attr_name)
280+ if value is not None:
281+ Sanitizer.verifyNewlineConsistency(value, attr_name)
282+ for index, translation in enumerate(message.translations):
283+ Sanitizer.verifyNewlineConsistency(
284+ translation, 'translation %d' % index)
285+
286
287 def is_identical_translation(existing_msg, new_msg):
288 """Is a new translation substantially the same as the existing one?
289@@ -373,7 +387,7 @@
290 """
291
292 def __init__(self, translation_import_queue_entry,
293- importer, logger = None):
294+ importer, logger=None):
295 """Base constructor to set up common attributes and parse the imported
296 file into a member variable (self.translation_file).
297
298@@ -578,6 +592,7 @@
299
300 for message in self.translation_file.messages:
301 if message.msgid_singular:
302+ verify_message_newline_consistency(message)
303 self.importMessage(message)
304
305 return self.errors, self.translation_file.syntax_warnings
306@@ -680,7 +695,7 @@
307 message._translations = None
308
309 if len(message.flags) > 0:
310- flags_comment = u", "+u", ".join(message.flags)
311+ flags_comment = u", " + u", ".join(message.flags)
312 else:
313 flags_comment = u""
314