Merge lp:~openerp-dev/openobject-server/6.0-bug-732512-xrg into lp:openobject-server/6.0

Proposed by xrg
Status: Needs review
Proposed branch: lp:~openerp-dev/openobject-server/6.0-bug-732512-xrg
Merge into: lp:openobject-server/6.0
Diff against target: 200 lines (+91/-25)
1 file modified
bin/tools/misc.py (+91/-25)
To merge this branch: bzr merge lp:~openerp-dev/openobject-server/6.0-bug-732512-xrg
Reviewer Review Type Date Requested Status
OpenERP buildbot (community) Approve
Review via email: mp+52827@code.launchpad.net
To post a comment you must log in.
Revision history for this message
OpenERP buildbot (openerp-buildbot) :
review: Approve

Unmerged revisions

3363. By xrg

tools/misc: try harder to encode smtp address headers

Problem is that encoding the full header (comma-separated list of
email addresses) in one =?c?...?= chunk is not properly handled by
some mail transport agents[1]. Even worse, when parts cannot fit
into one 76-char line (note that utf-8 over base64 tends to be rather
long), Postfix thinks each line is another address, that it decides
to trivial-rewrite and append domain name etc.
In another case noted, invalid chars (not properly encoded) or half-
addresses were adding to the spam-score of mails.

Solution: according to RFC 2047, we can have multiple encoded-words
(the =?.?..?= part) as any whitespace-delimited part of the header.
We /cannot/ have them inside quoted strings like "=?..?=", but need
to include the quotes inside the encoded-word.
The patch splits the address tokens to their smallest allowed "words"
and then tries to encode them one by one. Hopefully, names may need the
encoding, while emails will be pure us-ascii and will be appended
verbatim. It also includes support for one 'hint' encoding (iso8859-7 in
my case, for example) that would have shorter representation than utf-8.
Saves the day for some long names.

Note: a side-effect is that we add one space /before/ the comma when
we separate multiple addresses, like ' "Me" <me@here> , "Him" <him@there>'
It should be tolerated by the protocol, AFAICT.

[1] I've experienced it all these years we've been sending Greek names
in our From/To addresses. My maildirs are full of examples.
(cherry picked from commit 7fb0827e94b50b88d003c7c0b37abd918eed98ec)

3362. By xrg

misc/email: improve, utf-8 aware headers, better logging.

Some trivial improvements to email_send():
 - use pythonic logger
 - have one debug() line and one info() line before and after sending
   a message, respectively
 - if message is plain, don't do multipart, but simple email
 - if any header is non-latin, encode it with email.Header()
 - otoh, if mail is latin, don't encode it.
(cherry picked from commit 9c025f545a086e80c2258c35abada8084ccabf2f)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'bin/tools/misc.py'
2--- bin/tools/misc.py 2011-01-20 12:40:20 +0000
3+++ bin/tools/misc.py 2011-03-10 11:32:35 +0000
4@@ -43,6 +43,7 @@
5 from email.Header import Header
6 from email.Utils import formatdate, COMMASPACE
7 from email import Encoders
8+from email import Charset
9 from itertools import islice, izip
10 from lxml import etree
11 from which import which
12@@ -432,12 +433,14 @@
13 :return: True if the mail was delivered successfully to the smtp,
14 else False (+ exception logged)
15 """
16+ logger = logging.getLogger('email_send')
17+
18 class WriteToLogger(object):
19- def __init__(self):
20- self.logger = netsvc.Logger()
21+ def __init__(self, logger):
22+ self.logger = logger
23
24 def write(self, s):
25- self.logger.notifyChannel('email_send', netsvc.LOG_DEBUG, s)
26+ self.logger.debug(s)
27
28 if openobject_id:
29 message['Message-Id'] = generate_tracking_message_id(openobject_id)
30@@ -448,19 +451,22 @@
31 if smtp_server.startswith('maildir:/'):
32 from mailbox import Maildir
33 maildir_path = smtp_server[8:]
34- mdir = Maildir(maildir_path,factory=None, create = True)
35+ mdir = Maildir(maildir_path, factory=None, create = True)
36 mdir.add(message.as_string(True))
37+ logger.info("1 message to %d recepients saved at %s. Dry run.", len(smtp_to_list), maildir_path)
38 return True
39
40 oldstderr = smtplib.stderr
41- if not ssl: ssl = config.get('smtp_ssl', False)
42+ ssl = ssl or config.get('smtp_ssl', False)
43 s = smtplib.SMTP()
44 try:
45 # in case of debug, the messages are printed to stderr.
46 if debug:
47- smtplib.stderr = WriteToLogger()
48+ smtplib.stderr = WriteToLogger(logger)
49
50+ logger.debug("Sending Message to %s through %s", ','.join(smtp_to_list), smtp_server)
51 s.set_debuglevel(int(bool(debug))) # 0 or 1
52+
53 s.connect(smtp_server, config['smtp_port'])
54 if ssl:
55 s.ehlo()
56@@ -471,6 +477,7 @@
57 s.login(config['smtp_user'], config['smtp_password'])
58
59 s.sendmail(smtp_from, smtp_to_list, message.as_string())
60+ logger.info("1 message sent to %d recepients through %s", len(smtp_to_list), smtp_server)
61 finally:
62 try:
63 s.quit()
64@@ -481,7 +488,7 @@
65 pass
66
67 except Exception:
68- _logger.error('could not deliver email', exc_info=True)
69+ logger.error('could not deliver email', exc_info=True)
70 return False
71
72 return True
73@@ -503,50 +510,109 @@
74 if x_headers is None:
75 x_headers = {}
76
77+ def Header_Encoded(hstr):
78+ """Format hstr for an email header, possibly through utf8 encoding
79+ """
80+
81+ if not isinstance(hstr, unicode):
82+ hstr = ustr(hstr)
83+
84+ try:
85+ return hstr.encode('us-ascii')
86+ except UnicodeError:
87+ return Header(hstr, 'utf-8')
88+
89+ email_adre = re.compile(r'^((?:"[^"]+?")|(?:[^,<]+?)|\A)\s*<([\w\.-~]+?(?:@[\w\.-~]+)?)>$')
90+ email_charset = config.get_misc('smtp', 'charset', None)
91+
92+ def Address_Encoded(inp, header_name=None):
93+ """ Encode the inp address into a valid RFC 2047 header
94+ The problem is, that some mail transports (like Postfix <= 2.5) don't
95+ like all the header encoded into one string (that spans lines) and the
96+ protocol doesn't provide a line-continuation mark around that case.
97+ So, we have to split tokens and encode them separately.
98+ """
99+ if isinstance(inp, basestring):
100+ inp = [inp,]
101+
102+ tokens = []
103+ for itok in inp:
104+ if not isinstance(itok, unicode):
105+ itok = ustr(itok)
106+ itok = itok.strip()
107+ if not itok: # skip empty ones
108+ continue
109+ if tokens:
110+ tokens.append(',')
111+ m = email_adre.match(itok)
112+ if m:
113+ if m.group(1):
114+ tokens.append(m.group(1))
115+ tokens.append('<' + m.group(2) + '>')
116+ else:
117+ tokens.append(itok)
118+
119+ # Now, encode them one by one
120+ ret = Header(header_name=header_name)
121+ for t in tokens:
122+ # this method will try 'us-ascii', email_charset and 'utf-8'
123+ ret.append(t, charset=email_charset)
124+
125+ return ret
126
127 if not (email_from or config['email_from']):
128 raise ValueError("Sending an email requires either providing a sender "
129 "address or having configured one")
130
131 if not email_from: email_from = config.get('email_from', False)
132- email_from = ustr(email_from).encode('utf-8')
133
134 if not email_cc: email_cc = []
135 if not email_bcc: email_bcc = []
136- if not body: body = u''
137-
138- email_body = ustr(body).encode('utf-8')
139- email_text = MIMEText(email_body or '',_subtype=subtype,_charset='utf-8')
140-
141- msg = MIMEMultipart()
142-
143- msg['Subject'] = Header(ustr(subject), 'utf-8')
144- msg['From'] = email_from
145+ body_charset = 'us-ascii'
146+ if not body:
147+ body = u''
148+ else:
149+ body = ustr(body)
150+ try:
151+ txt_body = body.encode('us-ascii')
152+ except UnicodeError:
153+ txt_body = body.encode('utf-8')
154+ body_charset = 'utf-8'
155+
156+ email_text = MIMEText(txt_body,_subtype=subtype,_charset=body_charset)
157+
158+ if attach or (html2text and subtype == 'html'):
159+ msg = MIMEMultipart()
160+ else:
161+ msg = email_text
162+
163+ msg['Subject'] = Header_Encoded(subject)
164+ msg['From'] = Address_Encoded(email_from, 'From')
165 del msg['Reply-To']
166 if reply_to:
167- msg['Reply-To'] = reply_to
168+ msg['Reply-To'] = Address_Encoded(reply_to, 'Reply-To')
169 else:
170 msg['Reply-To'] = msg['From']
171- msg['To'] = COMMASPACE.join(email_to)
172+ msg['To'] = Address_Encoded(email_to, 'To')
173 if email_cc:
174- msg['Cc'] = COMMASPACE.join(email_cc)
175+ msg['Cc'] = Address_Encoded(email_cc, 'Cc')
176 if email_bcc:
177- msg['Bcc'] = COMMASPACE.join(email_bcc)
178+ msg['Bcc'] = Address_Encoded(email_bcc, 'Bcc')
179 msg['Date'] = formatdate(localtime=True)
180
181 msg['X-Priority'] = priorities.get(priority, '3 (Normal)')
182
183 # Add dynamic X Header
184 for key, value in x_headers.iteritems():
185- msg['%s' % key] = str(value)
186+ msg['%s' % key] = Header_Encoded(value)
187
188 if html2text and subtype == 'html':
189- text = html2text(email_body.decode('utf-8')).encode('utf-8')
190+ text = html2text(body).encode(body_charset)
191 alternative_part = MIMEMultipart(_subtype="alternative")
192- alternative_part.attach(MIMEText(text, _charset='utf-8', _subtype='plain'))
193+ alternative_part.attach(MIMEText(text, _charset=body_charset, _subtype='plain'))
194 alternative_part.attach(email_text)
195 msg.attach(alternative_part)
196- else:
197+ elif msg is not email_text:
198 msg.attach(email_text)
199
200 if attach: