Merge lp:~tkluck/postler/contact-parsing into lp:postler

Proposed by Timo Kluck
Status: Needs review
Proposed branch: lp:~tkluck/postler/contact-parsing
Merge into: lp:postler
Diff against target: 356 lines (+137/-99)
7 files modified
postler/postler-bureau.vala (+1/-1)
postler/postler-contact.vala (+55/-23)
postler/postler-content.vala (+2/-2)
postler/postler-index.vala (+1/-1)
postler/postler-message.vala (+23/-14)
postler/postler-messages.vala (+54/-57)
postler/postler-recipiententry.vala (+1/-1)
To merge this branch: bzr merge lp:~tkluck/postler/contact-parsing
Reviewer Review Type Date Requested Status
Cris Dywan Pending
Review via email: mp+67378@code.launchpad.net

Description of the change

This branch should fix 3 bugs:

 * Bug #804018: Difficulties with recipient parsing
 * Bug #806450: multi-line fields are not parsed correctly
 * a bug that I haven't reported where parsing encoded text can hang inside g_utf8_offset_to_pointer (), rendering the postler service unresponsive.

I cannot reproduce the last bug anymore, so I guess it only happened with invalid encoded text (which may have resulted from the previous bug). Still, my new implementation using regular expressions is simpler and therefore hopefully more robust. Please review.

To post a comment you must log in.
lp:~tkluck/postler/contact-parsing updated
901. By Timo Kluck

merge trunk

902. By Timo Kluck

merge trunk

903. By Timo Kluck

merge trunk

904. By Timo Kluck

merge trunk

905. By Timo Kluck

merge trunk

906. By Timo Kluck

merge trunk

907. By Timo Kluck

merge trunk

908. By Timo Kluck

merge trunk

909. By Timo Kluck

merge trunk

Unmerged revisions

909. By Timo Kluck

merge trunk

908. By Timo Kluck

merge trunk

907. By Timo Kluck

merge trunk

906. By Timo Kluck

merge trunk

905. By Timo Kluck

merge trunk

904. By Timo Kluck

merge trunk

903. By Timo Kluck

merge trunk

902. By Timo Kluck

merge trunk

901. By Timo Kluck

merge trunk

900. By Timo Kluck

 * no cuddled brackets
 * fix build warnings
 * fix bug with not breaking header parsing at empty line

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'postler/postler-bureau.vala'
2--- postler/postler-bureau.vala 2011-08-04 01:18:58 +0000
3+++ postler/postler-bureau.vala 2011-08-04 15:00:38 +0000
4@@ -968,7 +968,7 @@
5 string body = (
6 "From: Zeus\nTo: You, Hera <a@a>, Ares <a@a>, Aphrodite <a@a>,"
7 + "Artemis <a@a>, Hephaistos <a@a>, Poseidon <a@a>, Dionisos <a@a>,"
8- + "Apollo <a@a>, Hermes <a@a>, Athena <a@a>, Demeter <a@@>, "
9+ + "Apollo <a@a>, \"Hermes, Mercury\" <a@a>, Athena <a@a>, Demeter <a@@>, "
10 + "Hestia <a@a>\nCC: Hades <a@a>, Herkules <a@a>, Uranus <a@a>, Nyx <a@a>\n"
11 + "Subject: Regarding nudity on the Olympus\n"
12 + "Date: Fri, 28 May 2010 23:27:35 +0200\n"
13
14=== modified file 'postler/postler-contact.vala'
15--- postler/postler-contact.vala 2011-06-29 18:00:59 +0000
16+++ postler/postler-contact.vala 2011-08-04 15:00:38 +0000
17@@ -13,6 +13,7 @@
18 public class Contact {
19 public string display_name;
20 public GLib.File? avatar;
21+ static Regex contact_regex = null;
22
23 public Contact (string display_name, GLib.File? avatar) {
24 this.display_name = display_name;
25@@ -25,30 +26,33 @@
26 GLib.critical ("parse: assertion '!address.length < 1' failed");
27 return { address, address };
28 }
29-
30- if (!(">" in address && "<" in address))
31+ if(contact_regex == null) {
32+ /* taken from http://www.regular-expressions.info/email.html ; added possibility of "mailto:" */
33+ var valid_email_address_regex = """(mailto:)?(?<EMAIL>[A-Z0-9._%+-]+@[A-Z0-9.-]+\.[A-Z]{2,4})""";
34+ var possibly_invalid_email_address_regex = """(mailto:)?(?<EMAIL>.+)""";
35+ try {
36+ contact_regex = new Regex("""^\s*((("(?<NAME>.*)"|'(?<NAME>.*)'|(?<NAME>.*)))?\s*<%s>|%s)\s*$"""
37+ .printf (valid_email_address_regex, possibly_invalid_email_address_regex),
38+ GLib.RegexCompileFlags.CASELESS|GLib.RegexCompileFlags.DUPNAMES);
39+ }
40+ catch (GLib.RegexError e) {
41+ GLib.critical ("parse: could not parse regular expression");
42+ return { address, address };
43+ }
44+ }
45+ MatchInfo mi;
46+ if(contact_regex.match (address, 0, out mi)) {
47+ var email = mi.fetch_named ("EMAIL");
48+ if (email == null || email == "") email = address;
49+ var name = mi.fetch_named ("NAME");
50+ if (name == null || name == "") name = email;
51+ name = name.replace ("\\\"", "`");
52+ return { name , email };
53+ }
54+ else {
55+ GLib.warning ("parse: could not parse address %s", address);
56 return { address, address };
57-
58- long greater = address.length - 1;
59- while (address[greater] != '>')
60- greater--;
61- long lower = greater;
62- while (address[lower] != '<')
63- lower--;
64-
65- string recipient = address.slice (lower + 1, greater);
66- if (recipient.has_prefix ("mailto:"))
67- recipient = recipient.substring (7, -1);
68- if (lower == 0)
69- return { recipient, recipient };
70- if (">" in recipient)
71- return { recipient, recipient };
72-
73- /* Remove double or single quotes around the name */
74- long first = address.has_prefix ("'") || address.has_prefix ("\"") ? 1 : 0;
75- return { address.substring (first, lower - 1)
76- .replace ("\\\"", "`")
77- .replace ("' ", "").replace ("\"", "").chomp (), recipient };
78+ }
79 }
80
81 public static string address_from_string (string contact) {
82@@ -69,6 +73,34 @@
83 return true;
84 return false;
85 }
86+
87+ public static string[] quoted_split (string? comma_separated_list) {
88+ var retval = new string[0];
89+ if (comma_separated_list != null) {
90+ var cur = "";
91+ foreach (var s in comma_separated_list.split (",")) {
92+ if (cur == "") {
93+ cur = s;
94+ }
95+ else {
96+ cur += "," + s;
97+ }
98+ bool quot_open = false;
99+ for (var tail = cur.chr (-1, '"'); tail != null; tail = tail[1:tail.length].chr (-1, '"')) {
100+ quot_open = !quot_open;
101+ }
102+ if (!quot_open && cur != "") {
103+ retval += cur;
104+ cur = "";
105+ }
106+ }
107+ if (cur != "") {
108+ retval += cur;
109+ }
110+ }
111+ return retval;
112+ }
113+
114 }
115 }
116
117
118=== modified file 'postler/postler-content.vala'
119--- postler/postler-content.vala 2011-08-04 01:18:58 +0000
120+++ postler/postler-content.vala 2011-08-04 15:00:38 +0000
121@@ -217,7 +217,7 @@
122 string arguments = parts[1] != null ? ("?" + parts[1]) : "";
123
124 int count = 0;
125- foreach (string address in addresses.split (",")) {
126+ foreach (string address in Contact.quoted_split (addresses)) {
127 if (address == "")
128 continue;
129 if (!address.contains ("@")) {
130@@ -771,7 +771,7 @@
131 reply_markup = outgoing_template.replace ("%message%", reply_markup);
132 else
133 reply_markup = content_template.replace ("%message%", reply_markup);
134- reply_markup = reply_markup.replace ("\"%sender%\"", ("\"" + child.sender + "\""));
135+ reply_markup = reply_markup.replace ("\"%sender%\"", ("\"" + html_escape (child.sender) + "\""));
136 /* Do inherit colors, to match Adium and Empathy */
137 reply_markup = reply_markup.replace ("%sender%", linkify_address (child.sender, true));
138 reply_markup = reply_markup.replace ("%time{%X}%", format_date (child.date));
139
140=== modified file 'postler/postler-index.vala'
141--- postler/postler-index.vala 2011-07-16 23:34:19 +0000
142+++ postler/postler-index.vala 2011-08-04 15:00:38 +0000
143@@ -385,7 +385,7 @@
144 try {
145 var contact = guess_name (address);
146 if (contact != null) {
147- message.sender = "%s <%s>".printf (contact.display_name, address);
148+ message.sender = "\"%s\" <%s>".printf (contact.display_name, address);
149 message.avatar = contact.avatar;
150 }
151 }
152
153=== modified file 'postler/postler-message.vala'
154--- postler/postler-message.vala 2011-08-04 01:18:57 +0000
155+++ postler/postler-message.vala 2011-08-04 15:00:38 +0000
156@@ -293,15 +293,16 @@
157 void read_from_stream (GLib.DataInputStream stream,
158 GLib.Cancellable? cancellable = null) throws GLib.Error {
159
160+ this.recipients = null;
161+
162 this.stream = stream;
163- string line;
164- string previous_line = "";
165- while ((line = stream.read_line (null, cancellable)) != null) {
166- if (line == "")
167- break;
168- if (line[0] == '\t' || line[0] == ' ')
169- line = previous_line + " " + line.chug ();
170- previous_line = line;
171+ string line = stream.read_line (null, cancellable);
172+ string next_line = "";
173+ while (line != null && line != "" && (next_line = stream.read_line (null, cancellable)) != null) {
174+ if (next_line[0] == '\t' || next_line[0] == ' ') {
175+ line = line + " " + next_line.chug ();
176+ continue;
177+ }
178
179 string[] parts = line.split (":", 2);
180 if (parts == null || parts[0] == null)
181@@ -403,6 +404,8 @@
182 }
183 else if (field == "content-type" || field == "content-transfer-encoding")
184 fields.insert (field, parts[1]);
185+
186+ line = next_line;
187 }
188
189 /* Treat top-levels like replies to themselves */
190@@ -416,12 +419,18 @@
191 Also Reply-To may equal From, which is at best confusing. */
192 if (reply_to != null) {
193 string canonical = Postler.Contact.address_from_string (reply_to);
194- if (Postler.Contact.equal (canonical, get_field ("to")))
195- reply_to = null;
196- else if (Postler.Contact.equal (canonical, get_field ("list-post")))
197- reply_to = null;
198- else if (Postler.Contact.equal (canonical, sender))
199- reply_to = null;
200+ foreach(var r in Postler.Contact.quoted_split (recipients)) {
201+ if (Postler.Contact.equal (canonical, r)) {
202+ reply_to = null;
203+ break;
204+ }
205+ }
206+ if (Postler.Contact.equal (canonical, get_field ("list-post"))) {
207+ reply_to = null;
208+ }
209+ else if (Postler.Contact.equal (canonical, sender)) {
210+ reply_to = null;
211+ }
212 }
213
214 /* noreply@, noreply-12345@ or foobar-noreply@ */
215
216=== modified file 'postler/postler-messages.vala'
217--- postler/postler-messages.vala 2011-07-29 19:36:16 +0000
218+++ postler/postler-messages.vala 2011-08-04 15:00:38 +0000
219@@ -269,70 +269,67 @@
220 delete_checked ();
221 }
222
223- static string decode_piece (string encoded, out string charset) {
224- if (!encoded.contains ("=?"))
225- return encoded;
226- int token1 = 0;
227- while (!(encoded[token1] == '=' && encoded[token1 + 1] == '?'))
228- token1++;
229- token1++;
230- int token = token1 + 1;
231- while (encoded[token] != '?')
232- token++;
233- charset = encoded[token1 + 1:token].up ();
234- /* Encoding aliases */
235- if (charset == "KS_C_5601-1987")
236- charset = "CP949";
237-
238- token++;
239- unichar encoding = encoded[token].toupper ();
240- if (encoding != 'Q' && encoding != 'B')
241- return encoded;
242- token++;
243- if (encoded[token] != '?')
244- return encoded;
245- token++;
246- string[] pieces = encoded.slice (token, encoded.length).split ("?=");
247- if (pieces == null || pieces[0] == null)
248- return encoded;
249- string unquoted;
250- if (encoding == 'Q') {
251- unquoted = pieces[0].replace (" =", " ").replace ("_", " ");
252- unquoted = Postler.Message.quoted_printable_decode (unquoted);
253- }
254- else if (encoding == 'B')
255- unquoted = (string)GLib.Base64.decode (pieces[0]);
256- else
257- unquoted = pieces[0];
258- try {
259- return encoded.substring (0, token1 - 1)
260- + GLib.convert (unquoted, -1, "UTF-8", charset, null) +
261- (pieces[1] != null ? pieces[1] : "");
262- }
263- catch (GLib.ConvertError error) {
264- GLib.warning (_("Failed to convert \"%s\": %s"), encoded, error.message);
265- return encoded.substring (0, token1 - 1)
266- + pieces[0] + (pieces[1] != null ? pieces[1] : "");
267- }
268- }
269-
270 internal static string parse_encoded (string? encoded, out string charset) {
271 charset = null;
272 return_val_if_fail (encoded != null, null);
273
274 /* format "=?charset?encoding?encoded?=",
275 if in doubt, bail out and take the raw data */
276- /* We mask "?q?=" as "\nq\n=" here because ?= is our delimiter */
277- string eencoded = encoded.replace ("?q?=", "\nq\n=").replace ("?Q?=", "\nQ\n=");
278- string[] pieces = eencoded.strip ().split ("?=");
279- if (pieces == null || pieces[0] == null)
280- return encoded;
281-
282 var decoded = new GLib.StringBuilder ();
283- foreach (string piece in pieces) {
284- piece = piece.replace ("\nq\n=", "?q?=").replace ("\nQ\n=", "?Q?=");
285- decoded.append (decode_piece (piece, out charset));
286- }
287+ Regex regex;
288+ try {
289+ regex = new Regex("""((?<=\?=)|^)(?P<UNENCODED>.*?)(=\?(?P<CHARSET>[^?]*)\?(?P<ENCODING>[qQbB])\?(?P<ENCODED>.*?)\?=|$)""",
290+ GLib.RegexCompileFlags.DOTALL,0);
291+ }
292+ catch (GLib.RegexError e) {
293+ GLib.critical ("parse_encoded: could not parse regular expression");
294+ return encoded;
295+ }
296+
297+ MatchInfo mi;
298+ if ( !regex.match (encoded, 0, out mi)) {
299+ return encoded;
300+ }
301+
302+ try {
303+ do {
304+ var unencoded = mi.fetch_named ("UNENCODED") ?? "";
305+ charset = (mi.fetch_named ("CHARSET") ?? "").up ();
306+ var encoding = (mi.fetch_named ("ENCODING") ?? "").up ();
307+ var encoded_text = mi.fetch_named ("ENCODED") ?? "";
308+
309+ if(charset == "" || encoding == "") {
310+ decoded.append (unencoded);
311+ continue;
312+ }
313+
314+ /* Encoding aliases */
315+ if (charset == "KS_C_5601-1987")
316+ charset = "CP949";
317+
318+ string unquoted;
319+ if (encoding == "Q") {
320+ unquoted = encoded_text.replace (" =", " ").replace ("_", " ");
321+ unquoted = Postler.Message.quoted_printable_decode (unquoted);
322+ }
323+ else if (encoding == "B")
324+ unquoted = (string)GLib.Base64.decode (encoded_text);
325+ else
326+ unquoted = encoded_text;
327+ try {
328+ decoded.append (unencoded + GLib.convert (unquoted, -1, "UTF-8", charset, null));
329+ }
330+ catch (GLib.ConvertError error) {
331+ GLib.warning (_("Failed to convert \"%s\": %s"), encoded, error.message);
332+ decoded.append (unencoded + encoded_text);
333+ }
334+ } while (mi.next ());
335+ }
336+ catch (GLib.RegexError e) {
337+ GLib.warning ("parse: could not parse encoded string starting with %s", encoded[0:40]);
338+ return encoded;
339+ }
340+
341 return fallback_to_utf8 (decoded.str);
342 }
343
344
345=== modified file 'postler/postler-recipiententry.vala'
346--- postler/postler-recipiententry.vala 2011-06-23 18:59:06 +0000
347+++ postler/postler-recipiententry.vala 2011-08-04 15:00:38 +0000
348@@ -139,7 +139,7 @@
349
350 void buttonize_text () {
351 if (entry.text.chr (-1, '@') != null) {
352- string[] addresses = entry.text.split (",");
353+ string[] addresses = Contact.quoted_split (entry.text);
354 entry.text = "";
355 foreach (string address in addresses)
356 add_address (address);

Subscribers

People subscribed via source and target branches

to status/vote changes: