Merge lp:~mitya57/ubuntu/vivid/beautifulsoup4/tests into lp:ubuntu/vivid/beautifulsoup4

Proposed by Dmitry Shachnev
Status: Merged
Merged at revision: 17
Proposed branch: lp:~mitya57/ubuntu/vivid/beautifulsoup4/tests
Merge into: lp:ubuntu/vivid/beautifulsoup4
Diff against target: 511 lines (+465/-2)
6 files modified
.pc/applied-patches (+1/-0)
.pc/fix-chardet-failure/bs4/tests/test_soup.py (+434/-0)
bs4/tests/test_soup.py (+2/-2)
debian/changelog (+6/-0)
debian/patches/fix-chardet-failure (+21/-0)
debian/patches/series (+1/-0)
To merge this branch: bzr merge lp:~mitya57/ubuntu/vivid/beautifulsoup4/tests
Reviewer Review Type Date Requested Status
Stefano Rivera Pending
Ubuntu branches Pending
Review via email: mp+241778@code.launchpad.net

Description of the change

Currently the autopkgtests fail on Jenkins, this branch will fix them.

This only happens when python3-chardet is installed. It does not happen on Debian's CI because python3-chardet is only a recommendation, not dependency.

Submitted a MP against upstream branch as well, with an explanation: https://code.launchpad.net/~mitya57/beautifulsoup/tests/+merge/241832.

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file '.pc/applied-patches'
2--- .pc/applied-patches 2014-10-16 22:57:41 +0000
3+++ .pc/applied-patches 2014-11-14 17:22:47 +0000
4@@ -1,1 +1,2 @@
5 python3.4-warnings
6+fix-chardet-failure
7
8=== added directory '.pc/fix-chardet-failure'
9=== added directory '.pc/fix-chardet-failure/bs4'
10=== added directory '.pc/fix-chardet-failure/bs4/tests'
11=== added file '.pc/fix-chardet-failure/bs4/tests/test_soup.py'
12--- .pc/fix-chardet-failure/bs4/tests/test_soup.py 1970-01-01 00:00:00 +0000
13+++ .pc/fix-chardet-failure/bs4/tests/test_soup.py 2014-11-14 17:22:47 +0000
14@@ -0,0 +1,434 @@
15+# -*- coding: utf-8 -*-
16+"""Tests of Beautiful Soup as a whole."""
17+
18+import logging
19+import unittest
20+import sys
21+import tempfile
22+
23+from bs4 import (
24+ BeautifulSoup,
25+ BeautifulStoneSoup,
26+)
27+from bs4.element import (
28+ CharsetMetaAttributeValue,
29+ ContentMetaAttributeValue,
30+ SoupStrainer,
31+ NamespacedAttribute,
32+ )
33+import bs4.dammit
34+from bs4.dammit import (
35+ EntitySubstitution,
36+ UnicodeDammit,
37+)
38+from bs4.testing import (
39+ SoupTest,
40+ skipIf,
41+)
42+import warnings
43+
44+try:
45+ from bs4.builder import LXMLTreeBuilder, LXMLTreeBuilderForXML
46+ LXML_PRESENT = True
47+except ImportError, e:
48+ LXML_PRESENT = False
49+
50+PYTHON_2_PRE_2_7 = (sys.version_info < (2,7))
51+PYTHON_3_PRE_3_2 = (sys.version_info[0] == 3 and sys.version_info < (3,2))
52+
53+class TestConstructor(SoupTest):
54+
55+ def test_short_unicode_input(self):
56+ data = u"<h1>éé</h1>"
57+ soup = self.soup(data)
58+ self.assertEqual(u"éé", soup.h1.string)
59+
60+ def test_embedded_null(self):
61+ data = u"<h1>foo\0bar</h1>"
62+ soup = self.soup(data)
63+ self.assertEqual(u"foo\0bar", soup.h1.string)
64+
65+
66+class TestDeprecatedConstructorArguments(SoupTest):
67+
68+ def test_parseOnlyThese_renamed_to_parse_only(self):
69+ with warnings.catch_warnings(record=True) as w:
70+ soup = self.soup("<a><b></b></a>", parseOnlyThese=SoupStrainer("b"))
71+ msg = str(w[0].message)
72+ self.assertTrue("parseOnlyThese" in msg)
73+ self.assertTrue("parse_only" in msg)
74+ self.assertEqual(b"<b></b>", soup.encode())
75+
76+ def test_fromEncoding_renamed_to_from_encoding(self):
77+ with warnings.catch_warnings(record=True) as w:
78+ utf8 = b"\xc3\xa9"
79+ soup = self.soup(utf8, fromEncoding="utf8")
80+ msg = str(w[0].message)
81+ self.assertTrue("fromEncoding" in msg)
82+ self.assertTrue("from_encoding" in msg)
83+ self.assertEqual("utf8", soup.original_encoding)
84+
85+ def test_unrecognized_keyword_argument(self):
86+ self.assertRaises(
87+ TypeError, self.soup, "<a>", no_such_argument=True)
88+
89+class TestWarnings(SoupTest):
90+
91+ def test_disk_file_warning(self):
92+ filehandle = tempfile.NamedTemporaryFile()
93+ filename = filehandle.name
94+ try:
95+ with warnings.catch_warnings(record=True) as w:
96+ soup = self.soup(filename)
97+ msg = str(w[0].message)
98+ self.assertTrue("looks like a filename" in msg)
99+ finally:
100+ filehandle.close()
101+
102+ # The file no longer exists, so Beautiful Soup will no longer issue the warning.
103+ with warnings.catch_warnings(record=True) as w:
104+ soup = self.soup(filename)
105+ self.assertEqual(0, len(w))
106+
107+ def test_url_warning(self):
108+ with warnings.catch_warnings(record=True) as w:
109+ soup = self.soup("http://www.crummy.com/")
110+ msg = str(w[0].message)
111+ self.assertTrue("looks like a URL" in msg)
112+
113+ with warnings.catch_warnings(record=True) as w:
114+ soup = self.soup("http://www.crummy.com/ is great")
115+ self.assertEqual(0, len(w))
116+
117+class TestSelectiveParsing(SoupTest):
118+
119+ def test_parse_with_soupstrainer(self):
120+ markup = "No<b>Yes</b><a>No<b>Yes <c>Yes</c></b>"
121+ strainer = SoupStrainer("b")
122+ soup = self.soup(markup, parse_only=strainer)
123+ self.assertEqual(soup.encode(), b"<b>Yes</b><b>Yes <c>Yes</c></b>")
124+
125+
126+class TestEntitySubstitution(unittest.TestCase):
127+ """Standalone tests of the EntitySubstitution class."""
128+ def setUp(self):
129+ self.sub = EntitySubstitution
130+
131+ def test_simple_html_substitution(self):
132+ # Unicode characters corresponding to named HTML entites
133+ # are substituted, and no others.
134+ s = u"foo\u2200\N{SNOWMAN}\u00f5bar"
135+ self.assertEqual(self.sub.substitute_html(s),
136+ u"foo&forall;\N{SNOWMAN}&otilde;bar")
137+
138+ def test_smart_quote_substitution(self):
139+ # MS smart quotes are a common source of frustration, so we
140+ # give them a special test.
141+ quotes = b"\x91\x92foo\x93\x94"
142+ dammit = UnicodeDammit(quotes)
143+ self.assertEqual(self.sub.substitute_html(dammit.markup),
144+ "&lsquo;&rsquo;foo&ldquo;&rdquo;")
145+
146+ def test_xml_converstion_includes_no_quotes_if_make_quoted_attribute_is_false(self):
147+ s = 'Welcome to "my bar"'
148+ self.assertEqual(self.sub.substitute_xml(s, False), s)
149+
150+ def test_xml_attribute_quoting_normally_uses_double_quotes(self):
151+ self.assertEqual(self.sub.substitute_xml("Welcome", True),
152+ '"Welcome"')
153+ self.assertEqual(self.sub.substitute_xml("Bob's Bar", True),
154+ '"Bob\'s Bar"')
155+
156+ def test_xml_attribute_quoting_uses_single_quotes_when_value_contains_double_quotes(self):
157+ s = 'Welcome to "my bar"'
158+ self.assertEqual(self.sub.substitute_xml(s, True),
159+ "'Welcome to \"my bar\"'")
160+
161+ def test_xml_attribute_quoting_escapes_single_quotes_when_value_contains_both_single_and_double_quotes(self):
162+ s = 'Welcome to "Bob\'s Bar"'
163+ self.assertEqual(
164+ self.sub.substitute_xml(s, True),
165+ '"Welcome to &quot;Bob\'s Bar&quot;"')
166+
167+ def test_xml_quotes_arent_escaped_when_value_is_not_being_quoted(self):
168+ quoted = 'Welcome to "Bob\'s Bar"'
169+ self.assertEqual(self.sub.substitute_xml(quoted), quoted)
170+
171+ def test_xml_quoting_handles_angle_brackets(self):
172+ self.assertEqual(
173+ self.sub.substitute_xml("foo<bar>"),
174+ "foo&lt;bar&gt;")
175+
176+ def test_xml_quoting_handles_ampersands(self):
177+ self.assertEqual(self.sub.substitute_xml("AT&T"), "AT&amp;T")
178+
179+ def test_xml_quoting_including_ampersands_when_they_are_part_of_an_entity(self):
180+ self.assertEqual(
181+ self.sub.substitute_xml("&Aacute;T&T"),
182+ "&amp;Aacute;T&amp;T")
183+
184+ def test_xml_quoting_ignoring_ampersands_when_they_are_part_of_an_entity(self):
185+ self.assertEqual(
186+ self.sub.substitute_xml_containing_entities("&Aacute;T&T"),
187+ "&Aacute;T&amp;T")
188+
189+ def test_quotes_not_html_substituted(self):
190+ """There's no need to do this except inside attribute values."""
191+ text = 'Bob\'s "bar"'
192+ self.assertEqual(self.sub.substitute_html(text), text)
193+
194+
195+class TestEncodingConversion(SoupTest):
196+ # Test Beautiful Soup's ability to decode and encode from various
197+ # encodings.
198+
199+ def setUp(self):
200+ super(TestEncodingConversion, self).setUp()
201+ self.unicode_data = u'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\N{LATIN SMALL LETTER E WITH ACUTE} bleu!</foo></body></html>'
202+ self.utf8_data = self.unicode_data.encode("utf-8")
203+ # Just so you know what it looks like.
204+ self.assertEqual(
205+ self.utf8_data,
206+ b'<html><head><meta charset="utf-8"/></head><body><foo>Sacr\xc3\xa9 bleu!</foo></body></html>')
207+
208+ def test_ascii_in_unicode_out(self):
209+ # ASCII input is converted to Unicode. The original_encoding
210+ # attribute is set to 'utf-8', a superset of ASCII.
211+ chardet = bs4.dammit.chardet_dammit
212+ logging.disable(logging.WARNING)
213+ try:
214+ def noop(str):
215+ return None
216+ # Disable chardet, which will realize that the ASCII is ASCII.
217+ bs4.dammit.chardet_dammit = noop
218+ ascii = b"<foo>a</foo>"
219+ soup_from_ascii = self.soup(ascii)
220+ unicode_output = soup_from_ascii.decode()
221+ self.assertTrue(isinstance(unicode_output, unicode))
222+ self.assertEqual(unicode_output, self.document_for(ascii.decode()))
223+ self.assertEqual(soup_from_ascii.original_encoding.lower(), "utf-8")
224+ finally:
225+ logging.disable(logging.NOTSET)
226+ bs4.dammit.chardet_dammit = chardet
227+
228+ def test_unicode_in_unicode_out(self):
229+ # Unicode input is left alone. The original_encoding attribute
230+ # is not set.
231+ soup_from_unicode = self.soup(self.unicode_data)
232+ self.assertEqual(soup_from_unicode.decode(), self.unicode_data)
233+ self.assertEqual(soup_from_unicode.foo.string, u'Sacr\xe9 bleu!')
234+ self.assertEqual(soup_from_unicode.original_encoding, None)
235+
236+ def test_utf8_in_unicode_out(self):
237+ # UTF-8 input is converted to Unicode. The original_encoding
238+ # attribute is set.
239+ soup_from_utf8 = self.soup(self.utf8_data)
240+ self.assertEqual(soup_from_utf8.decode(), self.unicode_data)
241+ self.assertEqual(soup_from_utf8.foo.string, u'Sacr\xe9 bleu!')
242+
243+ def test_utf8_out(self):
244+ # The internal data structures can be encoded as UTF-8.
245+ soup_from_unicode = self.soup(self.unicode_data)
246+ self.assertEqual(soup_from_unicode.encode('utf-8'), self.utf8_data)
247+
248+ @skipIf(
249+ PYTHON_2_PRE_2_7 or PYTHON_3_PRE_3_2,
250+ "Bad HTMLParser detected; skipping test of non-ASCII characters in attribute name.")
251+ def test_attribute_name_containing_unicode_characters(self):
252+ markup = u'<div><a \N{SNOWMAN}="snowman"></a></div>'
253+ self.assertEqual(self.soup(markup).div.encode("utf8"), markup.encode("utf8"))
254+
255+class TestUnicodeDammit(unittest.TestCase):
256+ """Standalone tests of UnicodeDammit."""
257+
258+ def test_unicode_input(self):
259+ markup = u"I'm already Unicode! \N{SNOWMAN}"
260+ dammit = UnicodeDammit(markup)
261+ self.assertEqual(dammit.unicode_markup, markup)
262+
263+ def test_smart_quotes_to_unicode(self):
264+ markup = b"<foo>\x91\x92\x93\x94</foo>"
265+ dammit = UnicodeDammit(markup)
266+ self.assertEqual(
267+ dammit.unicode_markup, u"<foo>\u2018\u2019\u201c\u201d</foo>")
268+
269+ def test_smart_quotes_to_xml_entities(self):
270+ markup = b"<foo>\x91\x92\x93\x94</foo>"
271+ dammit = UnicodeDammit(markup, smart_quotes_to="xml")
272+ self.assertEqual(
273+ dammit.unicode_markup, "<foo>&#x2018;&#x2019;&#x201C;&#x201D;</foo>")
274+
275+ def test_smart_quotes_to_html_entities(self):
276+ markup = b"<foo>\x91\x92\x93\x94</foo>"
277+ dammit = UnicodeDammit(markup, smart_quotes_to="html")
278+ self.assertEqual(
279+ dammit.unicode_markup, "<foo>&lsquo;&rsquo;&ldquo;&rdquo;</foo>")
280+
281+ def test_smart_quotes_to_ascii(self):
282+ markup = b"<foo>\x91\x92\x93\x94</foo>"
283+ dammit = UnicodeDammit(markup, smart_quotes_to="ascii")
284+ self.assertEqual(
285+ dammit.unicode_markup, """<foo>''""</foo>""")
286+
287+ def test_detect_utf8(self):
288+ utf8 = b"\xc3\xa9"
289+ dammit = UnicodeDammit(utf8)
290+ self.assertEqual(dammit.unicode_markup, u'\xe9')
291+ self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
292+
293+ def test_convert_hebrew(self):
294+ hebrew = b"\xed\xe5\xec\xf9"
295+ dammit = UnicodeDammit(hebrew, ["iso-8859-8"])
296+ self.assertEqual(dammit.original_encoding.lower(), 'iso-8859-8')
297+ self.assertEqual(dammit.unicode_markup, u'\u05dd\u05d5\u05dc\u05e9')
298+
299+ def test_dont_see_smart_quotes_where_there_are_none(self):
300+ utf_8 = b"\343\202\261\343\203\274\343\202\277\343\202\244 Watch"
301+ dammit = UnicodeDammit(utf_8)
302+ self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
303+ self.assertEqual(dammit.unicode_markup.encode("utf-8"), utf_8)
304+
305+ def test_ignore_inappropriate_codecs(self):
306+ utf8_data = u"Räksmörgås".encode("utf-8")
307+ dammit = UnicodeDammit(utf8_data, ["iso-8859-8"])
308+ self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
309+
310+ def test_ignore_invalid_codecs(self):
311+ utf8_data = u"Räksmörgås".encode("utf-8")
312+ for bad_encoding in ['.utf8', '...', 'utF---16.!']:
313+ dammit = UnicodeDammit(utf8_data, [bad_encoding])
314+ self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
315+
316+ def test_detect_html5_style_meta_tag(self):
317+
318+ for data in (
319+ b'<html><meta charset="euc-jp" /></html>',
320+ b"<html><meta charset='euc-jp' /></html>",
321+ b"<html><meta charset=euc-jp /></html>",
322+ b"<html><meta charset=euc-jp/></html>"):
323+ dammit = UnicodeDammit(data, is_html=True)
324+ self.assertEqual(
325+ "euc-jp", dammit.original_encoding)
326+
327+ def test_last_ditch_entity_replacement(self):
328+ # This is a UTF-8 document that contains bytestrings
329+ # completely incompatible with UTF-8 (ie. encoded with some other
330+ # encoding).
331+ #
332+ # Since there is no consistent encoding for the document,
333+ # Unicode, Dammit will eventually encode the document as UTF-8
334+ # and encode the incompatible characters as REPLACEMENT
335+ # CHARACTER.
336+ #
337+ # If chardet is installed, it will detect that the document
338+ # can be converted into ISO-8859-1 without errors. This happens
339+ # to be the wrong encoding, but it is a consistent encoding, so the
340+ # code we're testing here won't run.
341+ #
342+ # So we temporarily disable chardet if it's present.
343+ doc = b"""\357\273\277<?xml version="1.0" encoding="UTF-8"?>
344+<html><b>\330\250\330\252\330\261</b>
345+<i>\310\322\321\220\312\321\355\344</i></html>"""
346+ chardet = bs4.dammit.chardet_dammit
347+ logging.disable(logging.WARNING)
348+ try:
349+ def noop(str):
350+ return None
351+ bs4.dammit.chardet_dammit = noop
352+ dammit = UnicodeDammit(doc)
353+ self.assertEqual(True, dammit.contains_replacement_characters)
354+ self.assertTrue(u"\ufffd" in dammit.unicode_markup)
355+
356+ soup = BeautifulSoup(doc, "html.parser")
357+ self.assertTrue(soup.contains_replacement_characters)
358+ finally:
359+ logging.disable(logging.NOTSET)
360+ bs4.dammit.chardet_dammit = chardet
361+
362+ def test_byte_order_mark_removed(self):
363+ # A document written in UTF-16LE will have its byte order marker stripped.
364+ data = b'\xff\xfe<\x00a\x00>\x00\xe1\x00\xe9\x00<\x00/\x00a\x00>\x00'
365+ dammit = UnicodeDammit(data)
366+ self.assertEqual(u"<a>áé</a>", dammit.unicode_markup)
367+ self.assertEqual("utf-16le", dammit.original_encoding)
368+
369+ def test_detwingle(self):
370+ # Here's a UTF8 document.
371+ utf8 = (u"\N{SNOWMAN}" * 3).encode("utf8")
372+
373+ # Here's a Windows-1252 document.
374+ windows_1252 = (
375+ u"\N{LEFT DOUBLE QUOTATION MARK}Hi, I like Windows!"
376+ u"\N{RIGHT DOUBLE QUOTATION MARK}").encode("windows_1252")
377+
378+ # Through some unholy alchemy, they've been stuck together.
379+ doc = utf8 + windows_1252 + utf8
380+
381+ # The document can't be turned into UTF-8:
382+ self.assertRaises(UnicodeDecodeError, doc.decode, "utf8")
383+
384+ # Unicode, Dammit thinks the whole document is Windows-1252,
385+ # and decodes it into "☃☃☃“Hi, I like Windows!”☃☃☃"
386+
387+ # But if we run it through fix_embedded_windows_1252, it's fixed:
388+
389+ fixed = UnicodeDammit.detwingle(doc)
390+ self.assertEqual(
391+ u"☃☃☃“Hi, I like Windows!”☃☃☃", fixed.decode("utf8"))
392+
393+ def test_detwingle_ignores_multibyte_characters(self):
394+ # Each of these characters has a UTF-8 representation ending
395+ # in \x93. \x93 is a smart quote if interpreted as
396+ # Windows-1252. But our code knows to skip over multibyte
397+ # UTF-8 characters, so they'll survive the process unscathed.
398+ for tricky_unicode_char in (
399+ u"\N{LATIN SMALL LIGATURE OE}", # 2-byte char '\xc5\x93'
400+ u"\N{LATIN SUBSCRIPT SMALL LETTER X}", # 3-byte char '\xe2\x82\x93'
401+ u"\xf0\x90\x90\x93", # This is a CJK character, not sure which one.
402+ ):
403+ input = tricky_unicode_char.encode("utf8")
404+ self.assertTrue(input.endswith(b'\x93'))
405+ output = UnicodeDammit.detwingle(input)
406+ self.assertEqual(output, input)
407+
408+class TestNamedspacedAttribute(SoupTest):
409+
410+ def test_name_may_be_none(self):
411+ a = NamespacedAttribute("xmlns", None)
412+ self.assertEqual(a, "xmlns")
413+
414+ def test_attribute_is_equivalent_to_colon_separated_string(self):
415+ a = NamespacedAttribute("a", "b")
416+ self.assertEqual("a:b", a)
417+
418+ def test_attributes_are_equivalent_if_prefix_and_name_identical(self):
419+ a = NamespacedAttribute("a", "b", "c")
420+ b = NamespacedAttribute("a", "b", "c")
421+ self.assertEqual(a, b)
422+
423+ # The actual namespace is not considered.
424+ c = NamespacedAttribute("a", "b", None)
425+ self.assertEqual(a, c)
426+
427+ # But name and prefix are important.
428+ d = NamespacedAttribute("a", "z", "c")
429+ self.assertNotEqual(a, d)
430+
431+ e = NamespacedAttribute("z", "b", "c")
432+ self.assertNotEqual(a, e)
433+
434+
435+class TestAttributeValueWithCharsetSubstitution(unittest.TestCase):
436+
437+ def test_content_meta_attribute_value(self):
438+ value = CharsetMetaAttributeValue("euc-jp")
439+ self.assertEqual("euc-jp", value)
440+ self.assertEqual("euc-jp", value.original_value)
441+ self.assertEqual("utf8", value.encode("utf8"))
442+
443+
444+ def test_content_meta_attribute_value(self):
445+ value = ContentMetaAttributeValue("text/html; charset=euc-jp")
446+ self.assertEqual("text/html; charset=euc-jp", value)
447+ self.assertEqual("text/html; charset=euc-jp", value.original_value)
448+ self.assertEqual("text/html; charset=utf8", value.encode("utf8"))
449
450=== modified file 'bs4/tests/test_soup.py'
451--- bs4/tests/test_soup.py 2014-05-03 14:19:04 +0000
452+++ bs4/tests/test_soup.py 2014-11-14 17:22:47 +0000
453@@ -271,9 +271,9 @@
454 dammit.unicode_markup, """<foo>''""</foo>""")
455
456 def test_detect_utf8(self):
457- utf8 = b"\xc3\xa9"
458+ utf8 = b"\xc3\xa9\xc3\xa9"
459 dammit = UnicodeDammit(utf8)
460- self.assertEqual(dammit.unicode_markup, u'\xe9')
461+ self.assertEqual(dammit.unicode_markup, u'\xe9\xe9')
462 self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
463
464 def test_convert_hebrew(self):
465
466=== modified file 'debian/changelog'
467--- debian/changelog 2014-10-26 09:32:48 +0000
468+++ debian/changelog 2014-11-14 17:22:47 +0000
469@@ -1,3 +1,9 @@
470+beautifulsoup4 (4.3.2-2ubuntu2) vivid; urgency=medium
471+
472+ * Work around tests failure when chardet is installed.
473+
474+ -- Dmitry Shachnev <mitya57@ubuntu.com> Fri, 14 Nov 2014 13:49:28 +0300
475+
476 beautifulsoup4 (4.3.2-2ubuntu1) vivid; urgency=medium
477
478 * Merge from Debian unstable. Remaining changes:
479
480=== added file 'debian/patches/fix-chardet-failure'
481--- debian/patches/fix-chardet-failure 1970-01-01 00:00:00 +0000
482+++ debian/patches/fix-chardet-failure 2014-11-14 17:22:47 +0000
483@@ -0,0 +1,21 @@
484+Description: fix tests failure when chardet is used
485+ This only happens when chardet is installed (and thus used for
486+ encoding detection).
487+Author: Dmitry Shachnev <mitya57@ubuntu.com>
488+Forwarded: yes, https://code.launchpad.net/~mitya57/beautifulsoup/tests/+merge/241832
489+Last-Update: 2014-11-14
490+
491+--- a/bs4/tests/test_soup.py
492++++ b/bs4/tests/test_soup.py
493+@@ -271,9 +271,9 @@
494+ dammit.unicode_markup, """<foo>''""</foo>""")
495+
496+ def test_detect_utf8(self):
497+- utf8 = b"\xc3\xa9"
498++ utf8 = b"\xc3\xa9\xc3\xa9"
499+ dammit = UnicodeDammit(utf8)
500+- self.assertEqual(dammit.unicode_markup, u'\xe9')
501++ self.assertEqual(dammit.unicode_markup, u'\xe9\xe9')
502+ self.assertEqual(dammit.original_encoding.lower(), 'utf-8')
503+
504+ def test_convert_hebrew(self):
505
506=== modified file 'debian/patches/series'
507--- debian/patches/series 2014-10-16 22:57:41 +0000
508+++ debian/patches/series 2014-11-14 17:22:47 +0000
509@@ -1,1 +1,2 @@
510 python3.4-warnings
511+fix-chardet-failure

Subscribers

People subscribed via source and target branches