Merge lp:~cjwatson/soupmatchers/python3 into lp:soupmatchers

Proposed by Colin Watson
Status: Merged
Merged at revision: 61
Proposed branch: lp:~cjwatson/soupmatchers/python3
Merge into: lp:soupmatchers
Diff against target: 730 lines (+124/-100)
5 files modified
README (+38/-37)
setup.py (+11/-1)
soupmatchers/__init__.py (+13/-8)
soupmatchers/tests/__init__.py (+13/-10)
soupmatchers/tests/test_matchers.py (+49/-44)
To merge this branch: bzr merge lp:~cjwatson/soupmatchers/python3
Reviewer Review Type Date Requested Status
James Westby Approve
Review via email: mp+332595@code.launchpad.net

Commit message

Port to beautifulsoup4 and Python 3.

Description of the change

Since the interface exposed by this package is mostly about searching in existing text, just continuing to use native strings everywhere seems to work fine; we just need the usual porting stuff, a BeautifulSoup upgrade, and some care around testtools.Content (whose second argument is an iterator over bytes objects).

To post a comment you must log in.
Revision history for this message
James Westby (james-w) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'README'
2--- README 2010-07-12 16:43:33 +0000
3+++ README 2017-10-20 22:33:14 +0000
4@@ -20,25 +20,25 @@
5 BeautifulSoup
6 -------------
7
8- >>> import BeautifulSoup
9- >>> root = BeautifulSoup.BeautifulSoup(html)
10+ >>> import bs4
11+ >>> root = bs4.BeautifulSoup(html, "html.parser")
12
13 It is an HTML parsing library that includes
14 a way to search the document for matching tags. If you had a parsed
15 representation of your document you could find the above part by doing
16
17 >>> import re
18- >>> anchor_tags = root.findAll(
19+ >>> anchor_tags = root.find_all(
20 ... "a", attrs={"href": "https://launchpad.net/testtools",
21 ... "class": "awesome"})
22- >>> print anchor_tags
23- [<a href="https://launchpad.net/testtools" class="awesome">testtools <b>rocks</b></a>]
24+ >>> print(anchor_tags)
25+ [<a class="awesome" href="https://launchpad.net/testtools">testtools <b>rocks</b></a>]
26
27-which would return you a list with (lets assume) a single entry, the
28-BeautifulSoup.Tag for the <a>. You can locate the nested tag with:
29+which would return you a list with (let's assume) a single entry, the
30+bs4.Tag for the <a>. You can locate the nested tag with:
31
32 >>> anchor_tag = anchor_tags[0]
33- >>> anchor_tag.findAll("b")
34+ >>> anchor_tag.find_all("b")
35 [<b>rocks</b>]
36
37 which will again return a single item list.
38@@ -65,10 +65,10 @@
39 has a certain css class, and mentions testtools in the anchor text.
40
41 >>> import soupmatchers
42- >>> print soupmatchers.Tag(
43+ >>> print(soupmatchers.Tag(
44 ... "link to testtols", "a",
45 ... attrs={"href": "https://launchpad.net/testtools",
46- ... "class": "awesome"})
47+ ... "class": "awesome"}))
48 Tag("link to testtols",
49 <a class='awesome' href='https://launchpad.net/testtools' ...>...</a>)
50
51@@ -81,29 +81,29 @@
52 Further though, soupmatchers allows you to specify text that the
53 tag must contain to match.
54
55- >>> print soupmatchers.Tag(
56- ... "link to testtols", "a",
57+ >>> print(soupmatchers.Tag(
58+ ... "link to testtools", "a",
59 ... attrs={"href": "https://launchpad.net/testtools",
60- ... "class": "awesome"}, text=re.compile(r"testtools"))
61- Tag("link to testtols",
62+ ... "class": "awesome"}, text=re.compile(r"testtools")))
63+ Tag("link to testtools",
64 <a class='awesome' href='https://launchpad.net/testtools'
65 ...>re.compile('testtools') ...</a>)
66
67 Now lets define a create a matcher that will match the bold tag from above.
68
69- >>> print soupmatchers.Tag("bold rocks", "b", text="rocks")
70+ >>> print(soupmatchers.Tag("bold rocks", "b", text="rocks"))
71 Tag("bold rocks", <b ...>rocks ...</b>)
72
73 Obviously this would allow the bold tag to be outside of the anchor tag, but
74 no fear, we can create a matcher that will check that one is inside the
75 other, simply use the Within matcher to combine the two.
76
77- >>> print soupmatchers.Within(
78+ >>> print(soupmatchers.Within(
79 ... soupmatchers.Tag(
80 ... "link to testtools", "a",
81 ... attrs={"href": "https://launchpad.net/testtools",
82 ... "class": "awesome"}, text=re.compile(r"testtools")),
83- ... soupmatchers.Tag("bold rocks", "b", text="rocks"))
84+ ... soupmatchers.Tag("bold rocks", "b", text="rocks")))
85 Tag("bold rocks", <b ...>rocks ...</b>) within Tag("link to testtools",
86 <a class='awesome' href='https://launchpad.net/testtools'
87 ...>re.compile('testtools') ...</a>)
88@@ -115,7 +115,8 @@
89 mean you have to go to the trouble of parsing every time you want to use
90 them. To simplify that you can use
91
92- >>> print soupmatchers.HTMLContains(soupmatchers.Tag("some image", "image"))
93+ >>> print(soupmatchers.HTMLContains(
94+ ... soupmatchers.Tag("some image", "image")))
95 HTML contains [Tag("some image", <image ...>...</image>)]
96
97 to create a matcher that will parse the string before checking the tag
98@@ -135,7 +136,7 @@
99 >>> import testtools
100 >>> matcher = testtools.matchers.Equals(1)
101 >>> match = matcher.match(1)
102- >>> print match
103+ >>> print(match)
104 None
105
106 the returned match will be None if the matcher matches the content that
107@@ -159,10 +160,10 @@
108 attribute of the passed object against an expected value, and also check
109 the content attribute against any matcher you wish to specify.
110
111- >>> print soupmatchers.ResponseHas(
112+ >>> print(soupmatchers.ResponseHas(
113 ... status_code=404,
114 ... content_matches=soupmatchers.HTMLContains(soupmatchers.Tag(
115- ... "an anchor", "a")))
116+ ... "an anchor", "a"))))
117 ResponseHas(status_code=404, content_matches=HTML contains
118 [Tag("an anchor", <a ...>...</a>)])
119
120@@ -171,8 +172,8 @@
121 As working with HTML is very common, there's an easier way to write the
122 above.
123
124- >>> print soupmatchers.HTMLResponseHas(
125- ... status_code=404, html_matches=soupmatchers.Tag("an anchor", "a"))
126+ >>> print(soupmatchers.HTMLResponseHas(
127+ ... status_code=404, html_matches=soupmatchers.Tag("an anchor", "a")))
128 HTMLResponseHas(status_code=404, content_matches=HTML contains
129 [Tag("an anchor", <a ...>...</a>)])
130
131@@ -201,12 +202,12 @@
132 ... html_matches=combined_matcher)
133 >>> #self.assertThat(response, response_matcher)
134 >>> match = response_matcher.match(ExpectedResponse())
135- >>> print match
136+ >>> print(match)
137 None
138 >>> match = response_matcher.match(UnexpectedResponse())
139- >>> print repr(match) #doctest: +ELLIPSIS
140+ >>> print(repr(match)) #doctest: +ELLIPSIS
141 <soupmatchers.TagMismatch object at ...>
142- >>> print match.describe()
143+ >>> print(match.describe())
144 Matched 0 times
145 Here is some information that may be useful:
146 0 matches for "bold rocks" in the document.
147@@ -218,7 +219,7 @@
148 Checking the number of times a pattern is matched
149 -------------------------------------------------
150
151-Remember how findAll returned a list, and we just assumed that it only found
152+Remember how find_all returned a list, and we just assumed that it only found
153 one tag in the example? Well, the matchers allow you to not just assume that,
154 they allow you to assert that. That means that you can assert that
155 a particular tag only occurs once by passing
156@@ -232,10 +233,10 @@
157 >>> html_matcher = soupmatchers.HTMLContains(tag_matcher)
158 >>> content = '<a href="https://launchpad.net/testtools"></a>'
159 >>> match = html_matcher.match(content)
160- >>> print match
161+ >>> print(match)
162 None
163 >>> match = html_matcher.match(content * 2)
164- >>> print match.describe()
165+ >>> print(match.describe())
166 Matched 2 times
167 The matches were:
168 <a href="https://launchpad.net/testtools"></a>
169@@ -251,7 +252,7 @@
170 >>> html_matcher = soupmatchers.HTMLContains(tag_matcher)
171 >>> content = '<a href="https://launchpad.net/testtools"></a>'
172 >>> match = html_matcher.match(content)
173- >>> print match.describe()
174+ >>> print(match.describe())
175 Matched 1 time
176 The match was:
177 <a href="https://launchpad.net/testtools"></a>
178@@ -275,9 +276,9 @@
179
180 >>> matcher = soupmatchers.HTMLContains(soupmatchers.Tag("bold", "b"))
181 >>> mismatch = matcher.match("<image></image>")
182- >>> print mismatch.get_details().keys()
183+ >>> print(list(mismatch.get_details().keys()))
184 ['html']
185- >>> print ''.join(list(mismatch.get_details()["html"].iter_bytes()))
186+ >>> print(''.join(list(mismatch.get_details()["html"].iter_text())))
187 <image></image>
188
189 If you use assertThat then it will automatically call addDetails with this
190@@ -294,7 +295,7 @@
191 >>> matcher = soupmatchers.HTMLContains(soupmatchers.Tag(
192 ... "no bold", "b", count=0))
193 >>> mismatch = matcher.match("<b>rocks</b>")
194- >>> print mismatch.describe()
195+ >>> print(mismatch.describe())
196 Matched 1 time
197 The match was:
198 <b>rocks</b>
199@@ -309,7 +310,7 @@
200 ... "class": "awesome"}))
201 >>> mismatch = matcher.match(
202 ... "<a href='https://launchpad.net/testtools'></a>")
203- >>> print mismatch.describe()
204+ >>> print(mismatch.describe())
205 Matched 0 times
206 Here is some information that may be useful:
207 1 matches for "testtools link" when attribute class="awesome" is not a
208@@ -319,7 +320,7 @@
209 ... soupmatchers.Tag("bold rocks", "b", text="rocks"))
210 >>> mismatch = matcher.match(
211 ... "<b>is awesome</b>")
212- >>> print mismatch.describe()
213+ >>> print(mismatch.describe())
214 Matched 0 times
215 Here is some information that may be useful:
216 1 matches for "bold rocks" when text="rocks" is not a requirement.
217@@ -342,11 +343,11 @@
218 >>> body_matcher = soupmatchers.Tag("the body", "body")
219 >>> matcher = soupmatchers.HTMLContains(
220 ... soupmatchers.Within(body_matcher, child_matcher))
221- >>> print matcher
222+ >>> print(matcher)
223 HTML contains [Tag("bold rocks", <b ...>rocks ...</b>)
224 within Tag("the body", <body ...>...</body>)]
225 >>> mismatch = matcher.match("<b>rocks</b><body></body>")
226- >>> print mismatch.describe()
227+ >>> print(mismatch.describe())
228 Matched 0 times
229 Here is some information that may be useful:
230 1 matches for "bold rocks" in the document.
231
232=== modified file 'setup.py'
233--- setup.py 2012-02-08 00:44:34 +0000
234+++ setup.py 2017-10-20 22:33:14 +0000
235@@ -17,5 +17,15 @@
236 to your TestCase hierarchy because it makes use of testtools
237 Matchers.'''),
238 setup_requires=['setuptools'],
239- install_requires=['testtools>0.9.3', 'BeautifulSoup'],
240+ install_requires=['testtools>0.9.3', 'beautifulsoup4'],
241+ classifiers=[
242+ "Development Status :: 5 - Production/Stable",
243+ "Intended Audience :: Developers",
244+ "License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)",
245+ "Programming Language :: Python",
246+ "Programming Language :: Python :: 2",
247+ "Programming Language :: Python :: 3",
248+ "Topic :: Text Processing :: Markup :: HTML",
249+ "Topic :: Software Development :: Testing",
250+ ],
251 )
252
253=== modified file 'soupmatchers/__init__.py'
254--- soupmatchers/__init__.py 2012-02-08 00:43:44 +0000
255+++ soupmatchers/__init__.py 2017-10-20 22:33:14 +0000
256@@ -23,7 +23,7 @@
257 See the README for more information.
258 """
259
260-import BeautifulSoup
261+import bs4
262
263 from testtools import matchers
264 from testtools.content import Content
265@@ -47,7 +47,7 @@
266 if self.html is not None:
267 return {
268 "html": Content(ContentType("text", "html"),
269- lambda: self.html)
270+ lambda: [self.html.encode("UTF-8")])
271 }
272 return {}
273
274@@ -125,7 +125,7 @@
275
276 def match(self, content):
277 if len(self.matchers) > 0:
278- parsed_content = BeautifulSoup.BeautifulSoup(content)
279+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
280 for matcher in self.matchers:
281 match = matcher.match(parsed_content)
282 if match is not None:
283@@ -171,7 +171,8 @@
284 def get_details(self):
285 details = {
286 "html": Content(
287- ContentType("text", "html"), lambda: [str(self.content)])
288+ ContentType("text", "html"),
289+ lambda: [str(self.content).encode("UTF-8")])
290 }
291 return details
292
293@@ -185,6 +186,10 @@
294 or self.content != other.content
295 or self.matches != other.matches)
296
297+ def __repr__(self):
298+ return "<soupmatchers.TagMismatch object at %x attributes=%r>" % (
299+ id(self), self.__dict__)
300+
301
302 class DocumentPart(matchers.Matcher):
303
304@@ -266,8 +271,8 @@
305 is a key in the dict, or True, which matches any tag name.
306
307 It is also possible to pass a callable as a tag name. The callable
308- should take a BeautifulSoup.Tag object as the argument and
309- return a boolean, with True indicating that the tag should match.
310+ should take a bs4.Tag object as the argument and return a boolean,
311+ with True indicating that the tag should match.
312
313 attrs is a dict that defines what attributes the tag should have.
314 The keys are the names of attributes and the values define the
315@@ -324,12 +329,12 @@
316 def _check_text(self, candidates, text):
317 if len(candidates) > 0 and text is not _not_passed:
318 for candidate in candidates[:]:
319- texts = candidate.findAll(text=text)
320+ texts = candidate.find_all(text=text)
321 if len(texts) < 1:
322 candidates.remove(candidate)
323
324 def _get_matches(self, html, attrs, text):
325- candidates = list(html.findAll(self.tag_type, attrs=attrs))
326+ candidates = list(html.find_all(self.tag_type, attrs=attrs))
327 self._check_text(candidates, text)
328 return candidates
329
330
331=== modified file 'soupmatchers/tests/__init__.py'
332--- soupmatchers/tests/__init__.py 2012-02-08 00:36:21 +0000
333+++ soupmatchers/tests/__init__.py 2017-10-20 22:33:14 +0000
334@@ -1,19 +1,22 @@
335+from __future__ import print_function
336+
337+
338 def load_tests(loader, standard_tests, pattern):
339 import doctest
340 import os
341 import sys
342- import unittest
343- suite = unittest.TestSuite()
344- loader = unittest.TestLoader()
345- suite.addTest(loader.loadTestsFromName(__name__))
346+
347+ this_dir = os.path.dirname(__file__)
348+ standard_tests.addTests(
349+ loader.discover(start_dir=this_dir, pattern=pattern))
350 source_readme_path = os.path.join(
351- os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "README")
352+ os.path.dirname(os.path.dirname(this_dir)), "README")
353 if os.path.exists(source_readme_path):
354- suite.addTest(
355- doctest.DocFileTest(os.path.relpath(
356- source_readme_path, os.path.dirname(__file__)),
357- optionflags=doctest.NORMALIZE_WHITESPACE))
358+ standard_tests.addTest(
359+ doctest.DocFileTest(os.path.relpath(source_readme_path, this_dir),
360+ optionflags=doctest.NORMALIZE_WHITESPACE,
361+ globs={'print_function': print_function}))
362 else:
363 sys.stderr.write("Warning: not testing README as it can't be found")
364- return suite
365+ return standard_tests
366
367
368=== modified file 'soupmatchers/tests/test_matchers.py'
369--- soupmatchers/tests/test_matchers.py 2012-02-08 00:43:44 +0000
370+++ soupmatchers/tests/test_matchers.py 2017-10-20 22:33:14 +0000
371@@ -2,7 +2,7 @@
372
373 import re
374
375-import BeautifulSoup
376+import bs4
377
378 from testtools import (
379 matchers as testtools_matchers,
380@@ -53,7 +53,8 @@
381 html = "<image></image>"
382 mismatch = matchers.StatusCodeMismatch(200, 404, html=html)
383 self.assertEqual(
384- {"html": Content(ContentType("text", "html"), lambda: html)},
385+ {"html": Content(
386+ ContentType("text", "html"), lambda: [html.encode("UTF-8")])},
387 mismatch.get_details())
388
389 def get_equal_mismatches(self):
390@@ -129,7 +130,9 @@
391 response = TestResponse(status_code=404, content=content)
392 match = matchers.ResponseHas(status_code=200).match(response)
393 self.assertEquals(
394- {"html": Content(ContentType("text", "html"), lambda: content)},
395+ {"html": Content(
396+ ContentType("text", "html"),
397+ lambda: [content.encode("UTF-8")])},
398 match.get_details())
399
400 def test_response_has_content_matches(self):
401@@ -195,7 +198,7 @@
402 html_matcher = matchers.HTMLContains(anchor_matcher)
403 content = "<image></image>"
404 match = html_matcher.match(content)
405- parsed_content = BeautifulSoup.BeautifulSoup(content)
406+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
407 self.assertEquals(
408 matchers.TagMismatch(anchor_matcher, parsed_content, []), match)
409
410@@ -203,7 +206,7 @@
411 html_matcher = matchers.HTMLContains(anchor_matcher, image_matcher)
412 content = "<image></image>"
413 match = html_matcher.match(content)
414- parsed_content = BeautifulSoup.BeautifulSoup(content)
415+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
416 self.assertEquals(
417 matchers.TagMismatch(anchor_matcher, parsed_content, []), match)
418
419@@ -211,7 +214,7 @@
420 html_matcher = matchers.HTMLContains(anchor_matcher, image_matcher)
421 content = "<b></b>"
422 match = html_matcher.match(content)
423- parsed_content = BeautifulSoup.BeautifulSoup(content)
424+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
425 self.assertEquals(
426 matchers.TagMismatch(anchor_matcher, parsed_content, []), match)
427
428@@ -229,13 +232,13 @@
429
430 def test_describe_zero(self):
431 content = "<image></image>"
432- parsed_content = BeautifulSoup.BeautifulSoup(content)
433+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
434 mismatch = matchers.TagMismatch(anchor_matcher, parsed_content, [])
435 self.assertEqual("Matched 0 times", mismatch.describe())
436
437 def test_describe_one(self):
438 content = "<image></image>"
439- parsed_content = BeautifulSoup.BeautifulSoup(content)
440+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
441 mismatch = matchers.TagMismatch(
442 anchor_matcher, parsed_content, ["<a></a>"])
443 self.assertEqual(
444@@ -244,7 +247,7 @@
445
446 def test_describe_two(self):
447 content = "<image></image>"
448- parsed_content = BeautifulSoup.BeautifulSoup(content)
449+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
450 mismatch = matchers.TagMismatch(
451 anchor_matcher, parsed_content, ["<a></a>", "<b></b>"])
452 self.assertEqual(
453@@ -253,15 +256,17 @@
454
455 def test_get_details(self):
456 content = "<image></image>"
457- parsed_content = BeautifulSoup.BeautifulSoup(content)
458+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
459 mismatch = matchers.TagMismatch(anchor_matcher, parsed_content, [])
460 self.assertEqual(
461- {"html": Content(ContentType("text", "html"),
462- lambda: str(parsed_content))}, mismatch.get_details())
463+ {"html": Content(
464+ ContentType("text", "html"),
465+ lambda: [str(parsed_content).encode("UTF-8")])},
466+ mismatch.get_details())
467
468 def test_eq_equal(self):
469 content = "<image></image>"
470- parsed_content = BeautifulSoup.BeautifulSoup(content)
471+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
472 matches = ["<a></a>"]
473 mismatch1 = matchers.TagMismatch(
474 anchor_matcher, parsed_content, matches)
475@@ -271,7 +276,7 @@
476
477 def test_eq_different_tag(self):
478 content = "<image></image>"
479- parsed_content = BeautifulSoup.BeautifulSoup(content)
480+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
481 matches = ["<a></a>"]
482 mismatch1 = matchers.TagMismatch(
483 anchor_matcher, parsed_content, matches)
484@@ -281,9 +286,9 @@
485
486 def test_eq_different_content(self):
487 content1 = "<image></image>"
488- parsed_content1 = BeautifulSoup.BeautifulSoup(content1)
489+ parsed_content1 = bs4.BeautifulSoup(content1, "html.parser")
490 content2 = "<div></div>"
491- parsed_content2 = BeautifulSoup.BeautifulSoup(content2)
492+ parsed_content2 = bs4.BeautifulSoup(content2, "html.parser")
493 matches = ["<a></a>"]
494 mismatch1 = matchers.TagMismatch(
495 anchor_matcher, parsed_content1, matches)
496@@ -293,7 +298,7 @@
497
498 def test_eq_different_matches(self):
499 content = "<image></image>"
500- parsed_content = BeautifulSoup.BeautifulSoup(content)
501+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
502 matches1 = ["<a></a>"]
503 matches2 = ["<b></b>"]
504 mismatch1 = matchers.TagMismatch(
505@@ -344,7 +349,7 @@
506 str(tag_matcher))
507
508 def get_match(self, matcher, content):
509- parsed_content = BeautifulSoup.BeautifulSoup(content)
510+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
511 return matcher.match(parsed_content), parsed_content
512
513 def test_matches_one_instance(self):
514@@ -524,14 +529,14 @@
515
516 def test_get_extra_info_no_close_matches(self):
517 content = "<image></image>"
518- parsed_content = BeautifulSoup.BeautifulSoup(content)
519+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
520 extra_info = anchor_matcher.get_extra_info([parsed_content], "")
521 self.assertEqual([], extra_info)
522
523 def test_get_extra_info_vary_attributes(self):
524 tag_matcher = matchers.Tag("foo link", "a", attrs={"href": "foo"})
525 content = "<a></a>"
526- parsed_content = BeautifulSoup.BeautifulSoup(content)
527+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
528 close_matches = tag_matcher.get_extra_info([parsed_content], "")
529 self.assertEqual(
530 ['1 matches for "foo link" when attribute href="foo" '
531@@ -541,7 +546,7 @@
532 tag_matcher = matchers.Tag(
533 "foo bar link", "a", attrs={"href": "foo", "class": "bar"})
534 content = "<a href='foo'></a><a class='bar'></a>"
535- parsed_content = BeautifulSoup.BeautifulSoup(content)
536+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
537 extra_info = tag_matcher.get_extra_info([parsed_content], "")
538 self.assertEqual(
539 ['1 matches for "foo bar link" when attribute class="bar" '
540@@ -553,7 +558,7 @@
541 def test_get_extra_info_vary_text(self):
542 tag_matcher = matchers.Tag("bold rocks", "b", text="rocks")
543 content = "<b>is awesome</b>"
544- parsed_content = BeautifulSoup.BeautifulSoup(content)
545+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
546 extra_info = tag_matcher.get_extra_info([parsed_content], "")
547 self.assertEqual(
548 ['1 matches for \"bold rocks\" when text="rocks" is not a '
549@@ -563,16 +568,16 @@
550 tag_matcher = matchers.Tag(
551 "no bold rocks", "b", text="rocks", count=0)
552 content = "<b>is awesome</b>"
553- parsed_content = BeautifulSoup.BeautifulSoup(content)
554+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
555 extra_info = tag_matcher.get_extra_info([parsed_content], "")
556 self.assertEqual([], extra_info)
557
558 def test_get_extra_info_multiple_roots(self):
559 tag_matcher = matchers.Tag("bold rocks", "b", text="rocks")
560 content1 = "<b>is awesome</b>"
561- parsed_content1 = BeautifulSoup.BeautifulSoup(content1)
562+ parsed_content1 = bs4.BeautifulSoup(content1, "html.parser")
563 content2 = "<b>is awesome</b>"
564- parsed_content2 = BeautifulSoup.BeautifulSoup(content2)
565+ parsed_content2 = bs4.BeautifulSoup(content2, "html.parser")
566 extra_info = tag_matcher.get_extra_info(
567 [parsed_content1, parsed_content2], "")
568 self.assertEqual(
569@@ -582,7 +587,7 @@
570 def test_get_extra_info_identifier_suffix(self):
571 tag_matcher = matchers.Tag("bold rocks", "b", text="rocks")
572 content = "<b>is awesome</b>"
573- parsed_content = BeautifulSoup.BeautifulSoup(content)
574+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
575 extra_info = tag_matcher.get_extra_info([parsed_content],
576 " within foo")
577 self.assertEqual(
578@@ -644,7 +649,7 @@
579 def test_match_outer_not_matched(self):
580 within_matcher = matchers.Within(anchor_matcher, image_matcher)
581 content = "<image></image>"
582- parsed_content = BeautifulSoup.BeautifulSoup(content)
583+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
584 mismatch = within_matcher.match(parsed_content)
585 self.assertEqual(
586 matchers.TagMismatch(within_matcher, parsed_content, []),
587@@ -653,7 +658,7 @@
588 def test_match_mismatch(self):
589 within_matcher = matchers.Within(anchor_matcher, image_matcher)
590 content = "<a></a><image></image>"
591- parsed_content = BeautifulSoup.BeautifulSoup(content)
592+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
593 mismatch = within_matcher.match(parsed_content)
594 self.assertEqual(
595 matchers.TagMismatch(within_matcher, parsed_content, []),
596@@ -662,7 +667,7 @@
597 def test_match_match_in_one(self):
598 within_matcher = matchers.Within(anchor_matcher, image_matcher)
599 content = "<a><image></image></a>"
600- parsed_content = BeautifulSoup.BeautifulSoup(content)
601+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
602 mismatch = within_matcher.match(parsed_content)
603 self.assertEqual(None, mismatch)
604
605@@ -676,7 +681,7 @@
606 content = '<div><div></div></div>'
607 child_matcher = matchers.Tag("div", 'div')
608 tag_matcher = matchers.Within(child_matcher, child_matcher)
609- parsed_content = BeautifulSoup.BeautifulSoup(content)
610+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
611 match = tag_matcher.match(parsed_content)
612 self.assertEqual(None, match)
613
614@@ -690,7 +695,7 @@
615 content = '<div><span></span></div>'
616 child_matcher = matchers.Tag("div", 'div')
617 tag_matcher = matchers.Within(child_matcher, child_matcher)
618- parsed_content = BeautifulSoup.BeautifulSoup(content)
619+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
620 match = tag_matcher.match(parsed_content)
621 self.assertEqual(
622 matchers.TagMismatch(tag_matcher, parsed_content, []),
623@@ -699,7 +704,7 @@
624 def test_get_extra_info_both_missing(self):
625 content = ''
626 tag_matcher = matchers.Within(anchor_matcher, image_matcher)
627- parsed_content = BeautifulSoup.BeautifulSoup(content)
628+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
629 extra_info = tag_matcher.get_extra_info([parsed_content], "")
630 self.assertEqual(
631 ['0 matches for "%s" in the document.'
632@@ -711,7 +716,7 @@
633 def test_get_extra_info_inner_missing(self):
634 content = '<a></a>'
635 tag_matcher = matchers.Within(anchor_matcher, image_matcher)
636- parsed_content = BeautifulSoup.BeautifulSoup(content)
637+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
638 extra_info = tag_matcher.get_extra_info([parsed_content], "")
639 self.assertEqual(
640 ['0 matches for "%s" in the document.'
641@@ -723,7 +728,7 @@
642 def test_get_extra_info_neither_missing(self):
643 content = '<a></a><image></image>'
644 tag_matcher = matchers.Within(anchor_matcher, image_matcher)
645- parsed_content = BeautifulSoup.BeautifulSoup(content)
646+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
647 extra_info = tag_matcher.get_extra_info([parsed_content], "")
648 self.assertEqual(
649 ['1 matches for "%s" in the document.'
650@@ -736,7 +741,7 @@
651 content = '<a><b>is awesome</b></a>'
652 bold_matcher = matchers.Tag("bold rocks", "b", text="rocks")
653 tag_matcher = matchers.Within(anchor_matcher, bold_matcher)
654- parsed_content = BeautifulSoup.BeautifulSoup(content)
655+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
656 extra_info = tag_matcher.get_extra_info([parsed_content], "")
657 self.assertEqual(
658 ['0 matches for "bold rocks" in the document.',
659@@ -749,7 +754,7 @@
660 def test_get_extra_info_multiple_parts(self):
661 content = '<a></a><image></image>'
662 tag_matcher = matchers.Within(anchor_matcher, image_matcher)
663- parsed_content = BeautifulSoup.BeautifulSoup(content)
664+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
665 extra_info = tag_matcher.get_extra_info(
666 [parsed_content, parsed_content], "")
667 self.assertEqual(
668@@ -762,7 +767,7 @@
669 def test_get_extra_info_with_suffix(self):
670 content = '<a></a><image></image>'
671 tag_matcher = matchers.Within(anchor_matcher, image_matcher)
672- parsed_content = BeautifulSoup.BeautifulSoup(content)
673+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
674 extra_info = tag_matcher.get_extra_info([parsed_content],
675 " within foo")
676 self.assertEqual(
677@@ -775,7 +780,7 @@
678 def test_get_matches(self):
679 within_matcher = matchers.Within(anchor_matcher, image_matcher)
680 content = "<a><image></image></a>"
681- parsed_content = BeautifulSoup.BeautifulSoup(content)
682+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
683 matches = within_matcher.get_matches(parsed_content)
684 self.assertEqual([content], [str(a) for a in matches])
685
686@@ -910,7 +915,7 @@
687 def test_get_matches_outer_missing(self):
688 select_matcher = self.get_select_matcher_with_single_choice()
689 content = '<option value="choice1">Choice 1</option>'
690- parsed_content = BeautifulSoup.BeautifulSoup(content)
691+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
692 matches = select_matcher.get_matches(parsed_content)
693 self.assertEqual([], matches)
694
695@@ -919,7 +924,7 @@
696 choices={"choice1": "Choice 1", "choice2": "Choice 2"})
697 content = ('<select name="foo"><option value="choice1">'
698 'Choice 1</option></select>')
699- parsed_content = BeautifulSoup.BeautifulSoup(content)
700+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
701 matches = select_matcher.get_matches(parsed_content)
702 self.assertEqual([], matches)
703
704@@ -929,7 +934,7 @@
705 content = ('<select name="foo"><option value="choice1">'
706 'Choice 1</option><option value="choice2">'
707 'Choice 2</option></select>')
708- parsed_content = BeautifulSoup.BeautifulSoup(content)
709+ parsed_content = bs4.BeautifulSoup(content, "html.parser")
710 matches = select_matcher.get_matches(parsed_content)
711 self.assertEqual([content], [str(a) for a in matches])
712
713@@ -941,14 +946,14 @@
714 'Choice 2</option></select>')
715 unmatched_content = ('<select name="foo"><option value="choice1">'
716 'Choice 1</option></select>')
717- parsed_content = BeautifulSoup.BeautifulSoup(
718- content+unmatched_content)
719+ parsed_content = bs4.BeautifulSoup(
720+ content+unmatched_content, "html.parser")
721 matches = select_matcher.get_matches(parsed_content)
722 self.assertEqual([content], [str(a) for a in matches])
723
724 def test_match_twice(self):
725 select_matcher = self.get_select_matcher_with_single_choice()
726 content = '<select><option value="choice1">Choice 1</option></select>'
727- parsed_content = BeautifulSoup.BeautifulSoup(content*2)
728+ parsed_content = bs4.BeautifulSoup(content*2, "html.parser")
729 matches = select_matcher.get_matches(parsed_content)
730 self.assertEqual([content]*2, [str(a) for a in matches])

Subscribers

People subscribed via source and target branches

to all changes: