Merge lp:~wgrant/launchpad/improved-html_escape into lp:launchpad

Proposed by William Grant
Status: Merged
Approved by: Steve Kowalik
Approved revision: no longer in the source branch.
Merged at revision: 16353
Proposed branch: lp:~wgrant/launchpad/improved-html_escape
Merge into: lp:launchpad
Diff against target: 187 lines (+80/-48)
3 files modified
lib/lp/services/webapp/doc/menus.txt (+0/-20)
lib/lp/services/webapp/escaping.py (+19/-28)
lib/lp/services/webapp/tests/test_escaping.py (+61/-0)
To merge this branch: bzr merge lp:~wgrant/launchpad/improved-html_escape
Reviewer Review Type Date Requested Status
Steve Kowalik (community) code Approve
Review via email: mp+138902@code.launchpad.net

Commit message

Rewrite html_escape() and structured() and make them escape quotes in preparation for their deployment everywhere.

Description of the change

Rewrite html_escape and structured to be a bit more sensible and shorter. html_escape will now also escape single and double quotes. I rewrote the tests as well.

To post a comment you must log in.
Revision history for this message
Steve Kowalik (stevenk) :
review: Approve (code)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'lib/lp/services/webapp/doc/menus.txt'
2--- lib/lp/services/webapp/doc/menus.txt 2012-06-15 13:45:02 +0000
3+++ lib/lp/services/webapp/doc/menus.txt 2012-12-10 04:21:20 +0000
4@@ -318,26 +318,6 @@
5 >>> IFacetLink(link).escapedtext
6 u'some <b> --&gt; </b> text'
7
8-We can test the 'structured' helper directly.
9-
10- >>> structured('foo').escapedtext
11- u'foo'
12-
13- >>> structured('%s', 'a < b > c & d " e').escapedtext
14- u'a &lt; b &gt; c &amp; d " e'
15-
16- >>> structured('a < b > c & d " e').escapedtext
17- u'a < b > c & d " e'
18-
19- >>> structured(
20- ... 'replacement %(foo)s, %(bar)s', foo='<foo>', bar='bar').escapedtext
21- u'replacement &lt;foo&gt;, bar'
22-
23- >>> structured('whatever', 'foo', 'bar', baz='baz')
24- Traceback (most recent call last):
25- ...
26- TypeError:...
27-
28 Next, we return the link as HTML.
29
30 # We need to use a real launchpad test request so the view adapter
31
32=== modified file 'lib/lp/services/webapp/escaping.py'
33--- lib/lp/services/webapp/escaping.py 2012-11-26 08:48:03 +0000
34+++ lib/lp/services/webapp/escaping.py 2012-12-10 04:21:20 +0000
35@@ -7,8 +7,6 @@
36 'structured',
37 ]
38
39-import cgi
40-
41 from zope.i18n import (
42 Message,
43 translate,
44@@ -19,6 +17,11 @@
45 from lp.services.webapp.publisher import get_current_browser_request
46
47
48+HTML_REPLACEMENTS = (
49+ ('&', '&amp;'), ('<', '&lt;'), ('>', '&gt;'), ('"', '&quot;'),
50+ ("'", '&#x27;'))
51+
52+
53 def html_escape(message):
54 """Performs translation and sanitizes any HTML present in the message.
55
56@@ -38,9 +41,10 @@
57 # It is possible that the message is wrapped in an
58 # internationalized object, so we need to translate it
59 # first. See bug #54987.
60- return cgi.escape(
61- unicode(
62- translate_if_i18n(message)))
63+ raw = unicode(translate_if_i18n(message))
64+ for needle, replacement in HTML_REPLACEMENTS:
65+ raw = raw.replace(needle, replacement)
66+ return raw
67
68
69 def translate_if_i18n(obj_or_msgid):
70@@ -49,9 +53,7 @@
71 Returns any other type of object untouched.
72 """
73 if isinstance(obj_or_msgid, Message):
74- return translate(
75- obj_or_msgid,
76- context=get_current_browser_request())
77+ return translate(obj_or_msgid, context=get_current_browser_request())
78 else:
79 # Just text (or something unknown).
80 return obj_or_msgid
81@@ -61,31 +63,20 @@
82
83 implements(IStructuredString)
84
85- def __init__(self, text, *replacements, **kwreplacements):
86- text = translate_if_i18n(text)
87+ def __init__(self, text, *reps, **kwreps):
88+ text = unicode(translate_if_i18n(text))
89 self.text = text
90- if replacements and kwreplacements:
91+ if reps and kwreps:
92 raise TypeError(
93 "You must provide either positional arguments or keyword "
94 "arguments to structured(), not both.")
95- if replacements:
96- escaped = []
97- for replacement in replacements:
98- if isinstance(replacement, structured):
99- escaped.append(unicode(replacement.escapedtext))
100- else:
101- escaped.append(cgi.escape(unicode(replacement)))
102- self.escapedtext = text % tuple(escaped)
103- elif kwreplacements:
104- escaped = {}
105- for key, value in kwreplacements.iteritems():
106- if isinstance(value, structured):
107- escaped[key] = unicode(value.escapedtext)
108- else:
109- escaped[key] = cgi.escape(unicode(value))
110- self.escapedtext = text % escaped
111+ if reps:
112+ escaped = tuple(html_escape(rep) for rep in reps)
113+ elif kwreps:
114+ escaped = dict((k, html_escape(v)) for k, v in kwreps.iteritems())
115 else:
116- self.escapedtext = unicode(text)
117+ escaped = ()
118+ self.escapedtext = text % escaped
119
120 def __repr__(self):
121 return "<structured-string '%s'>" % self.text
122
123=== added file 'lib/lp/services/webapp/tests/test_escaping.py'
124--- lib/lp/services/webapp/tests/test_escaping.py 1970-01-01 00:00:00 +0000
125+++ lib/lp/services/webapp/tests/test_escaping.py 2012-12-10 04:21:20 +0000
126@@ -0,0 +1,61 @@
127+# Copyright 2012 Canonical Ltd. This software is licensed under the
128+# GNU Affero General Public License version 3 (see the file LICENSE).
129+
130+__metaclass__ = type
131+
132+from lp.services.webapp.escaping import (
133+ html_escape,
134+ structured,
135+ )
136+from lp.testing import TestCase
137+
138+
139+class TestHtmlEscape(TestCase):
140+
141+ def test_escapes_special_characters(self):
142+ # &, <, >, " and ' are transformed into equivalent entity
143+ # references, as they hold special significance in HTML.
144+ self.assertEqual('&amp;&lt;&gt;&quot;&#x27;', html_escape('&<>"\''))
145+
146+ def test_structured_passed_through(self):
147+ # An IStructuredString escapes to the value of its .escapedtext.
148+ struct = structured('<b>%s</b>', '<i>It works!</i>')
149+ self.assertEqual(
150+ '<b>&lt;i&gt;It works!&lt;/i&gt;</b>', html_escape(struct))
151+
152+
153+class TestStructured(TestCase):
154+
155+ def test_escapes_args(self):
156+ # Normal string arguments are escaped before they're inserted
157+ # into the formatted string.
158+ struct = structured(
159+ '<b>%s</b> %s', 'I am <i>escaped</i>!', '"& I\'m too."')
160+ self.assertEqual(
161+ '<b>I am &lt;i&gt;escaped&lt;/i&gt;!</b> '
162+ '&quot;&amp; I&#x27;m too.&quot;',
163+ struct.escapedtext)
164+
165+ def test_structured_args_passed_through(self):
166+ # If an IStructuredString is used as an argument, its
167+ # .escapedtext is included verbatim. Other arguments are still
168+ # escaped.
169+ inner = structured('<b>%s</b>', '<i>some text</i>')
170+ outer = structured('<li>%s: %s</li>', 'First & last', inner)
171+ self.assertEqual(
172+ '<li>First &amp; last: <b>&lt;i&gt;some text&lt;/i&gt;</b></li>',
173+ outer.escapedtext)
174+
175+ def test_kwargs(self):
176+ # Keyword args work too.
177+ inner = structured('<b>%s</b>', '<i>some text</i>')
178+ outer = structured(
179+ '<li>%(capt)s: %(body)s</li>', capt='First & last', body=inner)
180+ self.assertEqual(
181+ '<li>First &amp; last: <b>&lt;i&gt;some text&lt;/i&gt;</b></li>',
182+ outer.escapedtext)
183+
184+ def test_mixing_kwargs_is_illegal(self):
185+ # Passing a combination of args and kwargs is illegal.
186+ self.assertRaises(
187+ TypeError, structured, '%s %(foo)s', 'bar', foo='foo')