Merge lp:~cjwatson/launchpad/destroy-ascii-smash into lp:launchpad
- destroy-ascii-smash
- Merge into devel
Status: | Merged |
---|---|
Merged at revision: | 17611 |
Proposed branch: | lp:~cjwatson/launchpad/destroy-ascii-smash |
Merge into: | lp:launchpad |
Diff against target: |
708 lines (+101/-302) 8 files modified
lib/lp/answers/doc/person.txt (+2/-3) lib/lp/answers/doc/questionsets.txt (+14/-15) lib/lp/app/stories/launchpad-root/site-search.txt (+1/-3) lib/lp/archiveuploader/tests/nascentupload-announcements.txt (+4/-7) lib/lp/archiveuploader/tests/safe_fix_maintainer.txt (+24/-22) lib/lp/archiveuploader/utils.py (+5/-10) lib/lp/services/encoding.py (+1/-200) lib/lp/soyuz/adapters/notification.py (+50/-42) |
To merge this branch: | bzr merge lp:~cjwatson/launchpad/destroy-ascii-smash |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
William Grant | code | Approve | |
Review via email: mp+264039@code.launchpad.net |
Commit message
Perform proper RFC2047-encoding of mail notification headers, and remove ascii_smash.
Description of the change
ascii_smash made some sense for shipit, where there were third-party limitations on label printing; but now that that's gone, there is no valid justification for using it anywhere in Launchpad. Its rendering of Arabic text is particularly terrible. I've replaced all uses of ascii_smash with either straightforward Unicode or RFC2047-encoding.
The Soyuz case requires delicate treatment, as we need fix_maintainer to cope with the particular variant of RFC822 used in Maintainer and Changed-By fields, but it isn't necessarily possible to run the RFC822 output of fix_maintainer back through fix_maintainer, because it may output a parenthesised form rather than an angle-bracketed form. I avoided this problem by keeping track of the e-mail address on its own in a few more places, which is enough for a Person lookup.
William Grant (wgrant) : | # |
Colin Watson (cjwatson) : | # |
Preview Diff
1 | === modified file 'lib/lp/answers/doc/person.txt' |
2 | --- lib/lp/answers/doc/person.txt 2013-05-01 00:23:31 +0000 |
3 | +++ lib/lp/answers/doc/person.txt 2015-07-07 13:35:34 +0000 |
4 | @@ -170,13 +170,12 @@ |
5 | But Carlos has one. |
6 | |
7 | # Because not everyone uses a real editor <wink> |
8 | - >>> from lp.services.encoding import ascii_smash |
9 | >>> carlos_raw = personset.getByName('carlos') |
10 | >>> carlos = IQuestionsPerson(carlos_raw) |
11 | >>> for question in carlos.searchQuestions( |
12 | ... language=(english, spanish)): |
13 | - ... print ascii_smash(question.title), question.language.code |
14 | - Problema al recompilar kernel con soporte smp (doble-nucleo) es |
15 | + ... print question.title, question.language.code |
16 | + Problema al recompilar kernel con soporte smp (doble-núcleo) es |
17 | |
18 | |
19 | Questions needing attention |
20 | |
21 | === modified file 'lib/lp/answers/doc/questionsets.txt' |
22 | --- lib/lp/answers/doc/questionsets.txt 2013-05-01 00:23:31 +0000 |
23 | +++ lib/lp/answers/doc/questionsets.txt 2015-07-07 13:35:34 +0000 |
24 | @@ -48,16 +48,15 @@ |
25 | regular full text algorithm. |
26 | |
27 | # Because not everyone uses a real editor <wink> |
28 | - >>> from lp.services.encoding import ascii_smash |
29 | >>> for question in question_set.searchQuestions(search_text=u'firefox'): |
30 | - ... print ascii_smash(question.title), question.target.displayname |
31 | - Problemas de Impressao no Firefox Mozilla Firefox |
32 | + ... print question.title, question.target.displayname |
33 | + Problemas de Impressão no Firefox Mozilla Firefox |
34 | Firefox loses focus and gets stuck Mozilla Firefox |
35 | Firefox cannot render Bank Site Mozilla Firefox |
36 | mailto: problem in webpage mozilla-firefox in Ubuntu |
37 | Newly installed plug-in doesn't seem to be used Mozilla Firefox |
38 | Problem showing the SVG demo on W3C site Mozilla Firefox |
39 | - AINKAFSEEN ALEFLAMTEHGHAINYEHYEHREHALEFTEH ... Ubuntu |
40 | + عكس التغييرات غير المحفوظة للمستن؟ Ubuntu |
41 | |
42 | |
43 | Status |
44 | @@ -93,8 +92,8 @@ |
45 | >>> from lp.services.worlddata.interfaces.language import ILanguageSet |
46 | >>> spanish = getUtility(ILanguageSet)['es'] |
47 | >>> for t in question_set.searchQuestions(language=spanish): |
48 | - ... print ascii_smash(t.title) |
49 | - Problema al recompilar kernel con soporte smp (doble-nucleo) |
50 | + ... print t.title |
51 | + Problema al recompilar kernel con soporte smp (doble-núcleo) |
52 | |
53 | |
54 | Combinations |
55 | @@ -106,14 +105,14 @@ |
56 | >>> for question in question_set.searchQuestions( |
57 | ... search_text=u'firefox', |
58 | ... status=(QuestionStatus.OPEN, QuestionStatus.INVALID)): |
59 | - ... print ascii_smash(question.title), question.status.title, ( |
60 | + ... print question.title, question.status.title, ( |
61 | ... question.target.displayname) |
62 | - Problemas de Impressao no Firefox Open Mozilla Firefox |
63 | + Problemas de Impressão no Firefox Open Mozilla Firefox |
64 | Firefox is slow and consumes too much ... mozilla-firefox in Ubuntu |
65 | Firefox loses focus and gets stuck Open Mozilla Firefox |
66 | Firefox cannot render Bank Site Open Mozilla Firefox |
67 | Problem showing the SVG demo on W3C site Open Mozilla Firefox |
68 | - AINKAFSEEN ALEFLAMTEHGHAINYEHYEHREHALEFTEH ... Ubuntu |
69 | + عكس التغييرات غير المحفوظة للمستن؟ Open Ubuntu |
70 | |
71 | |
72 | Sort order |
73 | @@ -126,24 +125,24 @@ |
74 | >>> from lp.answers.enums import QuestionSort |
75 | >>> for question in question_set.searchQuestions( |
76 | ... search_text=u'firefox', sort=QuestionSort.OLDEST_FIRST): |
77 | - ... print question.id, ascii_smash(question.title), ( |
78 | + ... print question.id, question.title, ( |
79 | ... question.target.displayname) |
80 | - 14 AINKAFSEEN ALEFLAMTEHGHAINYEHYEHREHALEFTEH ... Ubuntu |
81 | + 14 عكس التغييرات غير المحفوظة للمستن؟ Ubuntu |
82 | 1 Firefox cannot render Bank Site Mozilla Firefox |
83 | 2 Problem showing the SVG demo on W3C site Mozilla Firefox |
84 | 4 Firefox loses focus and gets stuck Mozilla Firefox |
85 | 6 Newly installed plug-in doesn't seem to be used Mozilla Firefox |
86 | 9 mailto: problem in webpage mozilla-firefox in Ubuntu |
87 | - 13 Problemas de Impressao no Firefox Mozilla Firefox |
88 | + 13 Problemas de Impressão no Firefox Mozilla Firefox |
89 | |
90 | When no text search is done, the default sort order is by newest first. |
91 | |
92 | >>> for question in question_set.searchQuestions( |
93 | ... status=QuestionStatus.OPEN)[:5]: |
94 | - ... print question.id, ascii_smash(question.title), ( |
95 | + ... print question.id, question.title, ( |
96 | ... question.target.displayname) |
97 | - 13 Problemas de Impressao no Firefox Mozilla Firefox |
98 | - 12 Problema al recompilar kernel con soporte smp (doble-nucleo) Ubuntu |
99 | + 13 Problemas de Impressão no Firefox Mozilla Firefox |
100 | + 12 Problema al recompilar kernel con soporte smp (doble-núcleo) Ubuntu |
101 | 11 Continue playing after shutdown Ubuntu |
102 | 5 Installation failed Ubuntu |
103 | 4 Firefox loses focus and gets stuck Mozilla Firefox |
104 | |
105 | === modified file 'lib/lp/app/stories/launchpad-root/site-search.txt' |
106 | --- lib/lp/app/stories/launchpad-root/site-search.txt 2013-09-27 04:13:23 +0000 |
107 | +++ lib/lp/app/stories/launchpad-root/site-search.txt 2015-07-07 13:35:34 +0000 |
108 | @@ -5,8 +5,6 @@ |
109 | specific search with Launchpad's prominent objects (projects, bugs, |
110 | teams, etc.). |
111 | |
112 | - >>> from lp.services.encoding import ascii_smash |
113 | - |
114 | # Our very helpful function for printing all the page results. |
115 | |
116 | >>> def print_search_results(contents=None): |
117 | @@ -14,7 +12,7 @@ |
118 | ... contents = anon_browser.contents |
119 | ... tag = find_tag_by_id(contents, 'search-results') |
120 | ... if tag: |
121 | - ... print ascii_smash(extract_text(tag)) |
122 | + ... print extract_text(tag) |
123 | |
124 | # Another helper to make searching convenient. |
125 | |
126 | |
127 | === modified file 'lib/lp/archiveuploader/tests/nascentupload-announcements.txt' |
128 | --- lib/lp/archiveuploader/tests/nascentupload-announcements.txt 2014-11-08 23:53:17 +0000 |
129 | +++ lib/lp/archiveuploader/tests/nascentupload-announcements.txt 2015-07-07 13:35:34 +0000 |
130 | @@ -512,7 +512,7 @@ |
131 | DEBUG * Changer using non-preferred email |
132 | DEBUG |
133 | DEBUG Date: Tue, 25 Apr 2006 10:36:14 -0300 |
134 | - DEBUG Changed-By: Celso R. Providelo <cprov@ubuntu.com> |
135 | + DEBUG Changed-By: cprov@ubuntu.com (Celso R. Providelo) |
136 | DEBUG Maintainer: Launchpad team <launchpad@lists.canonical.com> |
137 | DEBUG http://launchpad.dev/ubuntu/+source/bar/1.0-4 |
138 | DEBUG |
139 | @@ -679,8 +679,7 @@ |
140 | 0 |
141 | |
142 | Uploads with UTF-8 characters in email addresses in the changes file are |
143 | -permitted, but converted to ASCII, which is a limitation of the mailer. |
144 | -However, UTF-8 in the mail content is preserved. |
145 | +permitted, but RFC2047-encoded. UTF-8 in the mail content is preserved. |
146 | |
147 | >>> hoary.status = SeriesStatus.DEVELOPMENT |
148 | >>> anything_policy = getPolicy( |
149 | @@ -701,10 +700,8 @@ |
150 | >>> len(msgs) |
151 | 2 |
152 | |
153 | -"Cihar" should actually be "Čihař" but the mailer will convert to ASCII. |
154 | - |
155 | - >>> [message['From'] for message in msgs] |
156 | - ['Root <root@localhost>', 'Non-ascii changed-by Cihar |
157 | + >>> [message['From'].replace('\n ', ' ') for message in msgs] |
158 | + ['Root <root@localhost>', '=?utf-8?q?Non-ascii_changed-by_=C4=8Ciha=C5=99?= |
159 | <daniel.silverstone@canonical.com>'] |
160 | |
161 | UTF-8 text in the changes file that is sent on the email is preserved |
162 | |
163 | === modified file 'lib/lp/archiveuploader/tests/safe_fix_maintainer.txt' |
164 | --- lib/lp/archiveuploader/tests/safe_fix_maintainer.txt 2010-07-24 09:12:37 +0000 |
165 | +++ lib/lp/archiveuploader/tests/safe_fix_maintainer.txt 2015-07-07 13:35:34 +0000 |
166 | @@ -1,41 +1,43 @@ |
167 | -Test some utils method inheritaded from DAK: |
168 | +Test some utils method inherited from DAK: |
169 | |
170 | safe_fix_maintainer() is a function used to sanitise the |
171 | identification fields coming from the Debian control files (changes |
172 | and dsc). It allows safe unicode and non-unicode inputs. |
173 | |
174 | - >>> from lp.archiveuploader.utils import ( |
175 | - ... safe_fix_maintainer) |
176 | + >>> from lp.archiveuploader.utils import safe_fix_maintainer |
177 | |
178 | - >>> maintainer_field = 'maintainer' |
179 | - >>> changer_field = 'changed-by' |
180 | + >>> maintainer_field = 'maintainer' |
181 | + >>> changer_field = 'changed-by' |
182 | |
183 | Pure ASCII content using the two available fieldname (pretty much the same) |
184 | |
185 | - >>> content = 'Hello World <hello@world.com>' |
186 | - >>> safe_fix_maintainer(content, maintainer_field) |
187 | - ('Hello World <hello@world.com>', 'Hello World <hello@world.com>', 'Hello World', 'hello@world.com') |
188 | + >>> content = 'Hello World <hello@world.com>' |
189 | + >>> safe_fix_maintainer(content, maintainer_field) |
190 | + ('Hello World <hello@world.com>', 'Hello World <hello@world.com>', |
191 | + 'Hello World', 'hello@world.com') |
192 | |
193 | - >>> content = 'Hello World <hello@world.com>' |
194 | - >>> safe_fix_maintainer(content, changer_field) |
195 | - ('Hello World <hello@world.com>', 'Hello World <hello@world.com>', 'Hello World', 'hello@world.com') |
196 | + >>> content = 'Hello World <hello@world.com>' |
197 | + >>> safe_fix_maintainer(content, changer_field) |
198 | + ('Hello World <hello@world.com>', 'Hello World <hello@world.com>', |
199 | + 'Hello World', 'hello@world.com') |
200 | |
201 | |
202 | Passing Unicode: |
203 | |
204 | - # XXX cprov 2006-02-20 bug=32148: Not sure if it is working properly, |
205 | - # at least doesn't raise any exception like in bug #32148. |
206 | + # XXX cprov 2006-02-20 bug=32148: Not sure if it is working properly, |
207 | + # at least doesn't raise any exception like in bug #32148. |
208 | |
209 | - >>> content = u'Rapha\xc3l Pinson <raphink@ubuntu.com>' |
210 | - >>> safe_fix_maintainer(content, maintainer_field) |
211 | - ('RaphaAl Pinson <raphink@ubuntu.com>', 'RaphaAl Pinson <raphink@ubuntu.com>', 'RaphaAl Pinson', 'raphink@ubuntu.com') |
212 | + >>> content = u'Rapha\xc3l Pinson <raphink@ubuntu.com>' |
213 | + >>> safe_fix_maintainer(content, maintainer_field) |
214 | + ('Rapha\xc3\x83l Pinson <raphink@ubuntu.com>', |
215 | + '=?utf-8?q?Rapha=C3=83l_Pinson?= <raphink@ubuntu.com>', |
216 | + 'Rapha\xc3\x83l Pinson', 'raphink@ubuntu.com') |
217 | |
218 | |
219 | Passing latin encoded string: |
220 | |
221 | - >>> content = 'Rapha\xebl Pinson <raphink@ubuntu.com>' |
222 | - >>> safe_fix_maintainer(content, maintainer_field) |
223 | - ('Raphael Pinson <raphink@ubuntu.com>', 'Raphael Pinson <raphink@ubuntu.com>', 'Raphael Pinson', 'raphink@ubuntu.com') |
224 | - |
225 | - |
226 | - |
227 | + >>> content = 'Rapha\xebl Pinson <raphink@ubuntu.com>' |
228 | + >>> safe_fix_maintainer(content, maintainer_field) |
229 | + ('Rapha\xc3\xabl Pinson <raphink@ubuntu.com>', |
230 | + '=?utf-8?q?Rapha=C3=ABl_Pinson?= <raphink@ubuntu.com>', |
231 | + 'Rapha\xc3\xabl Pinson', 'raphink@ubuntu.com') |
232 | |
233 | === modified file 'lib/lp/archiveuploader/utils.py' |
234 | --- lib/lp/archiveuploader/utils.py 2015-03-13 19:05:50 +0000 |
235 | +++ lib/lp/archiveuploader/utils.py 2015-07-07 13:35:34 +0000 |
236 | @@ -1,4 +1,4 @@ |
237 | -# Copyright 2009-2012 Canonical Ltd. This software is licensed under the |
238 | +# Copyright 2009-2015 Canonical Ltd. This software is licensed under the |
239 | # GNU Affero General Public License version 3 (see the file LICENSE). |
240 | |
241 | """Archive uploader utilities.""" |
242 | @@ -37,10 +37,7 @@ |
243 | import signal |
244 | import subprocess |
245 | |
246 | -from lp.services.encoding import ( |
247 | - ascii_smash, |
248 | - guess as guess_encoding, |
249 | - ) |
250 | +from lp.services.encoding import guess as guess_encoding |
251 | from lp.soyuz.enums import BinaryPackageFileType |
252 | |
253 | |
254 | @@ -269,15 +266,13 @@ |
255 | def safe_fix_maintainer(content, fieldname): |
256 | """Wrapper for fix_maintainer() to handle unicode and string argument. |
257 | |
258 | - It verifies the content type and transform it in a unicode with guess() |
259 | - before call ascii_smash(). Then we can safely call fix_maintainer(). |
260 | + It verifies the content type and transforms it to a unicode with |
261 | + guess(). Then we can safely call fix_maintainer(). |
262 | """ |
263 | if type(content) != unicode: |
264 | content = guess_encoding(content) |
265 | |
266 | - content = ascii_smash(content) |
267 | - |
268 | - return fix_maintainer(content, fieldname) |
269 | + return fix_maintainer(content.encode("utf-8"), fieldname) |
270 | |
271 | |
272 | def extract_dpkg_source(dsc_filepath, target, vendor=None): |
273 | |
274 | === modified file 'lib/lp/services/encoding.py' |
275 | --- lib/lp/services/encoding.py 2011-12-19 23:38:16 +0000 |
276 | +++ lib/lp/services/encoding.py 2015-07-07 13:35:34 +0000 |
277 | @@ -1,20 +1,17 @@ |
278 | -# Copyright 2009 Canonical Ltd. This software is licensed under the |
279 | +# Copyright 2009-2015 Canonical Ltd. This software is licensed under the |
280 | # GNU Affero General Public License version 3 (see the file LICENSE). |
281 | |
282 | """Character encoding utilities""" |
283 | |
284 | __metaclass__ = type |
285 | __all__ = [ |
286 | - 'ascii_smash', |
287 | 'escape_nonascii_uniquely', |
288 | 'guess', |
289 | 'is_ascii_only', |
290 | ] |
291 | |
292 | import codecs |
293 | -from cStringIO import StringIO |
294 | import re |
295 | -import unicodedata |
296 | |
297 | |
298 | _boms = [ |
299 | @@ -156,202 +153,6 @@ |
300 | return unicode(s, 'ISO-8859-1', 'replace') |
301 | |
302 | |
303 | -def ascii_smash(unicode_string): |
304 | - """Attempt to convert the Unicode string, possibly containing accents, |
305 | - to an ASCII string. |
306 | - |
307 | - This is used for generating shipping labels because our shipping company |
308 | - can only deal with ASCII despite being European :-/ |
309 | - |
310 | - ASCII goes through just fine |
311 | - |
312 | - >>> ascii_smash(u"Hello") |
313 | - 'Hello' |
314 | - |
315 | - Latin-1 accented characters have their accents stripped. |
316 | - |
317 | - >>> ascii_smash(u"Ol\N{LATIN SMALL LETTER E WITH ACUTE}") |
318 | - 'Ole' |
319 | - >>> ascii_smash(u"\N{LATIN CAPITAL LETTER A WITH RING ABOVE}iste") |
320 | - 'Aiste' |
321 | - >>> ascii_smash( |
322 | - ... u"\N{LATIN SMALL LETTER AE}" |
323 | - ... u"\N{LATIN SMALL LETTER I WITH GRAVE}" |
324 | - ... u"\N{LATIN SMALL LETTER O WITH STROKE}" |
325 | - ... u"\N{LATIN SMALL LETTER U WITH CIRCUMFLEX}" |
326 | - ... ) |
327 | - 'aeiou' |
328 | - >>> ascii_smash( |
329 | - ... u"\N{LATIN CAPITAL LETTER AE}" |
330 | - ... u"\N{LATIN CAPITAL LETTER I WITH GRAVE}" |
331 | - ... u"\N{LATIN CAPITAL LETTER O WITH STROKE}" |
332 | - ... u"\N{LATIN CAPITAL LETTER U WITH TILDE}" |
333 | - ... ) |
334 | - 'AEIOU' |
335 | - >>> ascii_smash(u"Stra\N{LATIN SMALL LETTER SHARP S}e") |
336 | - 'Strasse' |
337 | - |
338 | - Moving further into Eastern Europe we get more odd letters |
339 | - |
340 | - >>> ascii_smash( |
341 | - ... u"\N{LATIN CAPITAL LETTER Z WITH CARON}" |
342 | - ... u"ivkovi\N{LATIN SMALL LETTER C WITH CARON}" |
343 | - ... ) |
344 | - 'Zivkovic' |
345 | - |
346 | - >>> ascii_smash(u"\N{LATIN CAPITAL LIGATURE OE}\N{LATIN SMALL LIGATURE OE}") |
347 | - 'OEoe' |
348 | - |
349 | - """ |
350 | - out = StringIO() |
351 | - for char in unicode_string: |
352 | - out.write(ascii_char_smash(char)) |
353 | - return out.getvalue() |
354 | - |
355 | - |
356 | -def ascii_char_smash(char): |
357 | - """Smash a single Unicode character into an ASCII representation. |
358 | - |
359 | - >>> ascii_char_smash(u"\N{KATAKANA LETTER SMALL A}") |
360 | - 'a' |
361 | - >>> ascii_char_smash(u"\N{KATAKANA LETTER A}") |
362 | - 'A' |
363 | - >>> ascii_char_smash(u"\N{KATAKANA LETTER KA}") |
364 | - 'KA' |
365 | - >>> ascii_char_smash(u"\N{HIRAGANA LETTER SMALL A}") |
366 | - 'a' |
367 | - >>> ascii_char_smash(u"\N{HIRAGANA LETTER A}") |
368 | - 'A' |
369 | - >>> ascii_char_smash(u"\N{BOPOMOFO LETTER ANG}") |
370 | - 'ANG' |
371 | - >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER H WITH STROKE}") |
372 | - 'H' |
373 | - >>> ascii_char_smash(u"\N{LATIN SMALL LETTER LONG S}") |
374 | - 's' |
375 | - >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER THORN}") |
376 | - 'TH' |
377 | - >>> ascii_char_smash(u"\N{LATIN SMALL LETTER THORN}") |
378 | - 'th' |
379 | - >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER I WITH OGONEK}") |
380 | - 'I' |
381 | - >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER AE}") |
382 | - 'AE' |
383 | - >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}") |
384 | - 'Ae' |
385 | - >>> ascii_char_smash(u"\N{LATIN SMALL LETTER A WITH DIAERESIS}") |
386 | - 'ae' |
387 | - >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}") |
388 | - 'Oe' |
389 | - >>> ascii_char_smash(u"\N{LATIN SMALL LETTER O WITH DIAERESIS}") |
390 | - 'oe' |
391 | - >>> ascii_char_smash(u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}") |
392 | - 'Ue' |
393 | - >>> ascii_char_smash(u"\N{LATIN SMALL LETTER U WITH DIAERESIS}") |
394 | - 'ue' |
395 | - >>> ascii_char_smash(u"\N{LATIN SMALL LETTER SHARP S}") |
396 | - 'ss' |
397 | - |
398 | - Latin-1 and other symbols are lost |
399 | - |
400 | - >>> ascii_char_smash(u"\N{POUND SIGN}") |
401 | - '' |
402 | - |
403 | - Unless they also happen to be letters of some kind, such as greek |
404 | - |
405 | - >>> ascii_char_smash(u"\N{MICRO SIGN}") |
406 | - 'mu' |
407 | - |
408 | - Fractions |
409 | - |
410 | - >>> ascii_char_smash(u"\N{VULGAR FRACTION ONE HALF}") |
411 | - '1/2' |
412 | - |
413 | - """ |
414 | - mapping = { |
415 | - u"\N{LATIN CAPITAL LETTER AE}": "AE", |
416 | - u"\N{LATIN SMALL LETTER AE}": "ae", |
417 | - |
418 | - u"\N{LATIN CAPITAL LETTER A WITH DIAERESIS}": "Ae", |
419 | - u"\N{LATIN SMALL LETTER A WITH DIAERESIS}": "ae", |
420 | - |
421 | - u"\N{LATIN CAPITAL LETTER O WITH DIAERESIS}": "Oe", |
422 | - u"\N{LATIN SMALL LETTER O WITH DIAERESIS}": "oe", |
423 | - |
424 | - u"\N{LATIN CAPITAL LETTER U WITH DIAERESIS}": "Ue", |
425 | - u"\N{LATIN SMALL LETTER U WITH DIAERESIS}": "ue", |
426 | - |
427 | - u"\N{LATIN SMALL LETTER SHARP S}": "ss", |
428 | - |
429 | - u"\N{LATIN CAPITAL LETTER THORN}": "TH", |
430 | - u"\N{LATIN SMALL LETTER THORN}": "th", |
431 | - |
432 | - u"\N{FRACTION SLASH}": "/", |
433 | - u"\N{MULTIPLICATION SIGN}": "x", |
434 | - |
435 | - u"\N{KATAKANA-HIRAGANA DOUBLE HYPHEN}": "=", |
436 | - } |
437 | - |
438 | - # Pass through ASCII |
439 | - if ord(char) < 127: |
440 | - return char |
441 | - |
442 | - # Handle manual mappings |
443 | - if mapping.has_key(char): |
444 | - return mapping[char] |
445 | - |
446 | - # Regress to decomposed form and recurse if necessary. |
447 | - decomposed = unicodedata.normalize("NFKD", char) |
448 | - if decomposed != char: |
449 | - out = StringIO() |
450 | - for char in decomposed: |
451 | - out.write(ascii_char_smash(char)) |
452 | - return out.getvalue() |
453 | - |
454 | - # Handle whitespace |
455 | - if char.isspace(): |
456 | - return " " |
457 | - |
458 | - # Handle digits |
459 | - if char.isdigit(): |
460 | - return unicodedata.digit(char) |
461 | - |
462 | - # Handle decimal (probably pointless given isdigit above) |
463 | - if char.isdecimal(): |
464 | - return unicodedata.decimal(char) |
465 | - |
466 | - # Handle numerics, such as 1/2 |
467 | - if char.isnumeric(): |
468 | - formatted = "%f" % unicodedata.numeric(char) |
469 | - # Strip leading and trailing 0 |
470 | - return formatted.strip("0") |
471 | - |
472 | - # Ignore unprintables, such as the accents we denormalized |
473 | - if not char.isalnum(): |
474 | - return "" |
475 | - |
476 | - # Return modified latin characters as just the latin part. |
477 | - name = unicodedata.name(char) |
478 | - |
479 | - match = re.search("LATIN CAPITAL LIGATURE (\w+)", name) |
480 | - if match is not None: |
481 | - return match.group(1) |
482 | - |
483 | - match = re.search("LATIN SMALL LIGATURE (\w+)", name) |
484 | - if match is not None: |
485 | - return match.group(1).lower() |
486 | - |
487 | - match = re.search("(?:LETTER SMALL|SMALL LETTER) (\w+)", name) |
488 | - if match is not None: |
489 | - return match.group(1).lower() |
490 | - |
491 | - match = re.search("LETTER (\w+)", name) |
492 | - if match is not None: |
493 | - return match.group(1) |
494 | - |
495 | - # Something we can't represent. Return empty string. |
496 | - return "" |
497 | - |
498 | - |
499 | def escape_nonascii_uniquely(bogus_string): |
500 | """Replace non-ascii characters with a hex representation. |
501 | |
502 | |
503 | === modified file 'lib/lp/soyuz/adapters/notification.py' |
504 | --- lib/lp/soyuz/adapters/notification.py 2015-03-13 19:05:50 +0000 |
505 | +++ lib/lp/soyuz/adapters/notification.py 2015-07-07 13:35:34 +0000 |
506 | @@ -1,4 +1,4 @@ |
507 | -# Copyright 2011-2014 Canonical Ltd. This software is licensed under the |
508 | +# Copyright 2011-2015 Canonical Ltd. This software is licensed under the |
509 | # GNU Affero General Public License version 3 (see the file LICENSE). |
510 | |
511 | """Notification for uploads and copies.""" |
512 | @@ -27,10 +27,7 @@ |
513 | from lp.registry.interfaces.person import IPersonSet |
514 | from lp.registry.interfaces.pocket import PackagePublishingPocket |
515 | from lp.services.config import config |
516 | -from lp.services.encoding import ( |
517 | - ascii_smash, |
518 | - guess as guess_encoding, |
519 | - ) |
520 | +from lp.services.encoding import guess as guess_encoding |
521 | from lp.services.mail.helpers import get_email_template |
522 | from lp.services.mail.sendmail import ( |
523 | format_address, |
524 | @@ -232,9 +229,11 @@ |
525 | |
526 | info = fetch_information(spr, bprs, changes) |
527 | from_addr = info['changedby'] |
528 | + from_email = info['changedby_email'] |
529 | if announce_from_person is not None: |
530 | if announce_from_person.preferredemail is not None: |
531 | from_addr = format_address_for_person(announce_from_person) |
532 | + from_email = announce_from_person.preferredemail.email |
533 | |
534 | # If we're sending an acceptance notification for a non-PPA upload, |
535 | # announce if possible. Avoid announcing backports, binary-only |
536 | @@ -243,7 +242,7 @@ |
537 | and not archive.is_ppa |
538 | and pocket != PackagePublishingPocket.BACKPORTS |
539 | and not (pocket == PackagePublishingPocket.SECURITY and spr is None) |
540 | - and not is_auto_sync_upload(spr, bprs, pocket, from_addr)): |
541 | + and not is_auto_sync_upload(spr, bprs, pocket, from_email)): |
542 | name = None |
543 | bcc_addr = None |
544 | if spr: |
545 | @@ -301,17 +300,17 @@ |
546 | # Some syncs (e.g. from Debian) will involve packages whose |
547 | # changed-by person was auto-created in LP and hence does not have a |
548 | # preferred email address set. We'll get a None here. |
549 | - changedby_person = email_to_person(info['changedby']) |
550 | + changedby_person = email_to_person(info['changedby_email']) |
551 | |
552 | if blamer is not None and blamer != changedby_person: |
553 | signer_signature = person_to_email(blamer) |
554 | if signer_signature != info['changedby']: |
555 | information['SIGNER'] = '\nSigned-By: %s' % signer_signature |
556 | # Add maintainer if present and different from changed-by. |
557 | - maintainer = info['maintainer'] |
558 | - changedby = info['changedby'] |
559 | - if maintainer and maintainer != changedby: |
560 | - information['MAINTAINER'] = '\nMaintainer: %s' % maintainer |
561 | + maintainer_displayname = info['maintainer_displayname'] |
562 | + if (maintainer_displayname and |
563 | + maintainer_displayname != changedby_displayname): |
564 | + information['MAINTAINER'] = '\nMaintainer: %s' % maintainer_displayname |
565 | return get_template(archive, action) % information |
566 | |
567 | |
568 | @@ -360,24 +359,15 @@ |
569 | config.uploader.default_sender_name, |
570 | config.uploader.default_sender_address) |
571 | |
572 | - # `sendmail`, despite handling unicode message bodies, can't |
573 | - # cope with non-ascii sender/recipient addresses, so ascii_smash |
574 | - # is used on all addresses. |
575 | - |
576 | # All emails from here have a Bcc to the default recipient. |
577 | bcc_text = format_address( |
578 | config.uploader.default_recipient_name, |
579 | config.uploader.default_recipient_address) |
580 | if bcc: |
581 | bcc_text = "%s, %s" % (bcc_text, bcc) |
582 | - extra_headers['Bcc'] = ascii_smash(bcc_text) |
583 | + extra_headers['Bcc'] = bcc_text |
584 | |
585 | - recipients = ascii_smash(", ".join(to_addrs)) |
586 | - if isinstance(from_addr, unicode): |
587 | - # ascii_smash only works on unicode strings. |
588 | - from_addr = ascii_smash(from_addr) |
589 | - else: |
590 | - from_addr.encode('ascii') |
591 | + recipients = ", ".join(to_addrs) |
592 | |
593 | if dry_run and logger is not None: |
594 | debug(logger, "Would have sent a mail:") |
595 | @@ -471,8 +461,8 @@ |
596 | candidate_recipients = [blamer] |
597 | info = fetch_information(spr, bprs, changes) |
598 | |
599 | - changer = email_to_person(info['changedby']) |
600 | - maintainer = email_to_person(info['maintainer']) |
601 | + changer = email_to_person(info['changedby_email']) |
602 | + maintainer = email_to_person(info['maintainer_email']) |
603 | |
604 | if blamer is None and not archive.is_copy: |
605 | debug(logger, "Changes file is unsigned; adding changer as recipient.") |
606 | @@ -565,23 +555,16 @@ |
607 | return summary |
608 | |
609 | |
610 | -def email_to_person(fullemail): |
611 | - """Return an `IPerson` given an RFC2047 email address. |
612 | +def email_to_person(email): |
613 | + """Return an `IPerson` given an email address (without a name). |
614 | |
615 | - :param fullemail: Potential email address. |
616 | + :param email: Potential email address. |
617 | :return: `IPerson` with the given email address. None if there |
618 | - isn't one, or if `fullemail` isn't a proper email address. |
619 | + isn't one, or if `email` is None. |
620 | """ |
621 | - if not fullemail: |
622 | - return None |
623 | - |
624 | - try: |
625 | - # The 2nd arg to s_f_m() doesn't matter as it won't fail since every- |
626 | - # thing will have already parsed at this point. |
627 | - rfc822, rfc2047, name, email = safe_fix_maintainer(fullemail, "email") |
628 | - return getUtility(IPersonSet).getByEmail(email) |
629 | - except ParseMaintError: |
630 | - return None |
631 | + if not email: |
632 | + return None |
633 | + return getUtility(IPersonSet).getByEmail(email) |
634 | |
635 | |
636 | def person_to_email(person): |
637 | @@ -591,6 +574,25 @@ |
638 | return format_address_for_person(person) |
639 | |
640 | |
641 | +def fix_email(fullemail, field_name): |
642 | + """Turn an email address from .changes into various useful forms. |
643 | + |
644 | + The input address may be None, or anything that `fix_maintainer` |
645 | + understands. |
646 | + |
647 | + :return: A tuple of (RFC2047-compatible address, Unicode |
648 | + RFC822-compatible address, email). |
649 | + """ |
650 | + if not fullemail: |
651 | + return None, None, None |
652 | + |
653 | + try: |
654 | + rfc822, rfc2047, _, email = safe_fix_maintainer(fullemail, field_name) |
655 | + return rfc2047, rfc822.decode('utf-8'), email |
656 | + except ParseMaintError: |
657 | + return None, None, None |
658 | + |
659 | + |
660 | def is_auto_sync_upload(spr, bprs, pocket, changed_by_email): |
661 | """Return True if this is a (Debian) auto sync upload. |
662 | |
663 | @@ -609,17 +611,19 @@ |
664 | def fetch_information(spr, bprs, changes, previous_version=None): |
665 | changedby = None |
666 | changedby_displayname = None |
667 | + changedby_email = None |
668 | maintainer = None |
669 | maintainer_displayname = None |
670 | + maintainer_email = None |
671 | |
672 | if changes: |
673 | changesfile = ChangesFile.formatChangesComment( |
674 | sanitize_string(changes.get('Changes'))) |
675 | date = changes.get('Date') |
676 | - changedby = sanitize_string(changes.get('Changed-By')) |
677 | - maintainer = sanitize_string(changes.get('Maintainer')) |
678 | - changedby_displayname = changedby |
679 | - maintainer_displayname = maintainer |
680 | + changedby, changedby_displayname, changedby_email = fix_email( |
681 | + changes.get('Changed-By'), 'Changed-By') |
682 | + maintainer, maintainer_displayname, maintainer_email = fix_email( |
683 | + changes.get('Maintainer'), 'Maintainer') |
684 | elif spr or bprs: |
685 | if not spr and bprs: |
686 | spr = bprs[0].build.source_package_release |
687 | @@ -631,10 +635,12 @@ |
688 | addr = formataddr((spr.creator.displayname, |
689 | spr.creator.preferredemail.email)) |
690 | changedby_displayname = sanitize_string(addr) |
691 | + changedby_email = spr.creator.preferredemail.email |
692 | if maintainer: |
693 | addr = formataddr((spr.maintainer.displayname, |
694 | spr.maintainer.preferredemail.email)) |
695 | maintainer_displayname = sanitize_string(addr) |
696 | + maintainer_email = spr.maintainer.preferredemail.email |
697 | else: |
698 | changesfile = date = None |
699 | |
700 | @@ -643,8 +649,10 @@ |
701 | 'date': date, |
702 | 'changedby': changedby, |
703 | 'changedby_displayname': changedby_displayname, |
704 | + 'changedby_email': changedby_email, |
705 | 'maintainer': maintainer, |
706 | 'maintainer_displayname': maintainer_displayname, |
707 | + 'maintainer_email': maintainer_email, |
708 | } |
709 | |
710 |