Merge lp:~cjwatson/soupmatchers/python3 into lp:soupmatchers
- python3
- Merge into trunk
Proposed by
Colin Watson
Status: | Merged |
---|---|
Merged at revision: | 61 |
Proposed branch: | lp:~cjwatson/soupmatchers/python3 |
Merge into: | lp:soupmatchers |
Diff against target: |
730 lines (+124/-100) 5 files modified
README (+38/-37) setup.py (+11/-1) soupmatchers/__init__.py (+13/-8) soupmatchers/tests/__init__.py (+13/-10) soupmatchers/tests/test_matchers.py (+49/-44) |
To merge this branch: | bzr merge lp:~cjwatson/soupmatchers/python3 |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
James Westby | Approve | ||
Review via email: mp+332595@code.launchpad.net |
Commit message
Port to beautifulsoup4 and Python 3.
Description of the change
Since the interface exposed by this package is mostly about searching in existing text, just continuing to use native strings everywhere seems to work fine; we just need the usual porting stuff, a BeautifulSoup upgrade, and some care around testtools.Content (whose second argument is an iterator over bytes objects).
To post a comment you must log in.
Revision history for this message
James Westby (james-w) : | # |
review:
Approve
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'README' |
2 | --- README 2010-07-12 16:43:33 +0000 |
3 | +++ README 2017-10-20 22:33:14 +0000 |
4 | @@ -20,25 +20,25 @@ |
5 | BeautifulSoup |
6 | ------------- |
7 | |
8 | - >>> import BeautifulSoup |
9 | - >>> root = BeautifulSoup.BeautifulSoup(html) |
10 | + >>> import bs4 |
11 | + >>> root = bs4.BeautifulSoup(html, "html.parser") |
12 | |
13 | It is an HTML parsing library that includes |
14 | a way to search the document for matching tags. If you had a parsed |
15 | representation of your document you could find the above part by doing |
16 | |
17 | >>> import re |
18 | - >>> anchor_tags = root.findAll( |
19 | + >>> anchor_tags = root.find_all( |
20 | ... "a", attrs={"href": "https://launchpad.net/testtools", |
21 | ... "class": "awesome"}) |
22 | - >>> print anchor_tags |
23 | - [<a href="https://launchpad.net/testtools" class="awesome">testtools <b>rocks</b></a>] |
24 | + >>> print(anchor_tags) |
25 | + [<a class="awesome" href="https://launchpad.net/testtools">testtools <b>rocks</b></a>] |
26 | |
27 | -which would return you a list with (lets assume) a single entry, the |
28 | -BeautifulSoup.Tag for the <a>. You can locate the nested tag with: |
29 | +which would return you a list with (let's assume) a single entry, the |
30 | +bs4.Tag for the <a>. You can locate the nested tag with: |
31 | |
32 | >>> anchor_tag = anchor_tags[0] |
33 | - >>> anchor_tag.findAll("b") |
34 | + >>> anchor_tag.find_all("b") |
35 | [<b>rocks</b>] |
36 | |
37 | which will again return a single item list. |
38 | @@ -65,10 +65,10 @@ |
39 | has a certain css class, and mentions testtools in the anchor text. |
40 | |
41 | >>> import soupmatchers |
42 | - >>> print soupmatchers.Tag( |
43 | + >>> print(soupmatchers.Tag( |
44 | ... "link to testtols", "a", |
45 | ... attrs={"href": "https://launchpad.net/testtools", |
46 | - ... "class": "awesome"}) |
47 | + ... "class": "awesome"})) |
48 | Tag("link to testtols", |
49 | <a class='awesome' href='https://launchpad.net/testtools' ...>...</a>) |
50 | |
51 | @@ -81,29 +81,29 @@ |
52 | Further though, soupmatchers allows you to specify text that the |
53 | tag must contain to match. |
54 | |
55 | - >>> print soupmatchers.Tag( |
56 | - ... "link to testtols", "a", |
57 | + >>> print(soupmatchers.Tag( |
58 | + ... "link to testtools", "a", |
59 | ... attrs={"href": "https://launchpad.net/testtools", |
60 | - ... "class": "awesome"}, text=re.compile(r"testtools")) |
61 | - Tag("link to testtols", |
62 | + ... "class": "awesome"}, text=re.compile(r"testtools"))) |
63 | + Tag("link to testtools", |
64 | <a class='awesome' href='https://launchpad.net/testtools' |
65 | ...>re.compile('testtools') ...</a>) |
66 | |
67 | Now lets define a create a matcher that will match the bold tag from above. |
68 | |
69 | - >>> print soupmatchers.Tag("bold rocks", "b", text="rocks") |
70 | + >>> print(soupmatchers.Tag("bold rocks", "b", text="rocks")) |
71 | Tag("bold rocks", <b ...>rocks ...</b>) |
72 | |
73 | Obviously this would allow the bold tag to be outside of the anchor tag, but |
74 | no fear, we can create a matcher that will check that one is inside the |
75 | other, simply use the Within matcher to combine the two. |
76 | |
77 | - >>> print soupmatchers.Within( |
78 | + >>> print(soupmatchers.Within( |
79 | ... soupmatchers.Tag( |
80 | ... "link to testtools", "a", |
81 | ... attrs={"href": "https://launchpad.net/testtools", |
82 | ... "class": "awesome"}, text=re.compile(r"testtools")), |
83 | - ... soupmatchers.Tag("bold rocks", "b", text="rocks")) |
84 | + ... soupmatchers.Tag("bold rocks", "b", text="rocks"))) |
85 | Tag("bold rocks", <b ...>rocks ...</b>) within Tag("link to testtools", |
86 | <a class='awesome' href='https://launchpad.net/testtools' |
87 | ...>re.compile('testtools') ...</a>) |
88 | @@ -115,7 +115,8 @@ |
89 | mean you have to go to the trouble of parsing every time you want to use |
90 | them. To simplify that you can use |
91 | |
92 | - >>> print soupmatchers.HTMLContains(soupmatchers.Tag("some image", "image")) |
93 | + >>> print(soupmatchers.HTMLContains( |
94 | + ... soupmatchers.Tag("some image", "image"))) |
95 | HTML contains [Tag("some image", <image ...>...</image>)] |
96 | |
97 | to create a matcher that will parse the string before checking the tag |
98 | @@ -135,7 +136,7 @@ |
99 | >>> import testtools |
100 | >>> matcher = testtools.matchers.Equals(1) |
101 | >>> match = matcher.match(1) |
102 | - >>> print match |
103 | + >>> print(match) |
104 | None |
105 | |
106 | the returned match will be None if the matcher matches the content that |
107 | @@ -159,10 +160,10 @@ |
108 | attribute of the passed object against an expected value, and also check |
109 | the content attribute against any matcher you wish to specify. |
110 | |
111 | - >>> print soupmatchers.ResponseHas( |
112 | + >>> print(soupmatchers.ResponseHas( |
113 | ... status_code=404, |
114 | ... content_matches=soupmatchers.HTMLContains(soupmatchers.Tag( |
115 | - ... "an anchor", "a"))) |
116 | + ... "an anchor", "a")))) |
117 | ResponseHas(status_code=404, content_matches=HTML contains |
118 | [Tag("an anchor", <a ...>...</a>)]) |
119 | |
120 | @@ -171,8 +172,8 @@ |
121 | As working with HTML is very common, there's an easier way to write the |
122 | above. |
123 | |
124 | - >>> print soupmatchers.HTMLResponseHas( |
125 | - ... status_code=404, html_matches=soupmatchers.Tag("an anchor", "a")) |
126 | + >>> print(soupmatchers.HTMLResponseHas( |
127 | + ... status_code=404, html_matches=soupmatchers.Tag("an anchor", "a"))) |
128 | HTMLResponseHas(status_code=404, content_matches=HTML contains |
129 | [Tag("an anchor", <a ...>...</a>)]) |
130 | |
131 | @@ -201,12 +202,12 @@ |
132 | ... html_matches=combined_matcher) |
133 | >>> #self.assertThat(response, response_matcher) |
134 | >>> match = response_matcher.match(ExpectedResponse()) |
135 | - >>> print match |
136 | + >>> print(match) |
137 | None |
138 | >>> match = response_matcher.match(UnexpectedResponse()) |
139 | - >>> print repr(match) #doctest: +ELLIPSIS |
140 | + >>> print(repr(match)) #doctest: +ELLIPSIS |
141 | <soupmatchers.TagMismatch object at ...> |
142 | - >>> print match.describe() |
143 | + >>> print(match.describe()) |
144 | Matched 0 times |
145 | Here is some information that may be useful: |
146 | 0 matches for "bold rocks" in the document. |
147 | @@ -218,7 +219,7 @@ |
148 | Checking the number of times a pattern is matched |
149 | ------------------------------------------------- |
150 | |
151 | -Remember how findAll returned a list, and we just assumed that it only found |
152 | +Remember how find_all returned a list, and we just assumed that it only found |
153 | one tag in the example? Well, the matchers allow you to not just assume that, |
154 | they allow you to assert that. That means that you can assert that |
155 | a particular tag only occurs once by passing |
156 | @@ -232,10 +233,10 @@ |
157 | >>> html_matcher = soupmatchers.HTMLContains(tag_matcher) |
158 | >>> content = '<a href="https://launchpad.net/testtools"></a>' |
159 | >>> match = html_matcher.match(content) |
160 | - >>> print match |
161 | + >>> print(match) |
162 | None |
163 | >>> match = html_matcher.match(content * 2) |
164 | - >>> print match.describe() |
165 | + >>> print(match.describe()) |
166 | Matched 2 times |
167 | The matches were: |
168 | <a href="https://launchpad.net/testtools"></a> |
169 | @@ -251,7 +252,7 @@ |
170 | >>> html_matcher = soupmatchers.HTMLContains(tag_matcher) |
171 | >>> content = '<a href="https://launchpad.net/testtools"></a>' |
172 | >>> match = html_matcher.match(content) |
173 | - >>> print match.describe() |
174 | + >>> print(match.describe()) |
175 | Matched 1 time |
176 | The match was: |
177 | <a href="https://launchpad.net/testtools"></a> |
178 | @@ -275,9 +276,9 @@ |
179 | |
180 | >>> matcher = soupmatchers.HTMLContains(soupmatchers.Tag("bold", "b")) |
181 | >>> mismatch = matcher.match("<image></image>") |
182 | - >>> print mismatch.get_details().keys() |
183 | + >>> print(list(mismatch.get_details().keys())) |
184 | ['html'] |
185 | - >>> print ''.join(list(mismatch.get_details()["html"].iter_bytes())) |
186 | + >>> print(''.join(list(mismatch.get_details()["html"].iter_text()))) |
187 | <image></image> |
188 | |
189 | If you use assertThat then it will automatically call addDetails with this |
190 | @@ -294,7 +295,7 @@ |
191 | >>> matcher = soupmatchers.HTMLContains(soupmatchers.Tag( |
192 | ... "no bold", "b", count=0)) |
193 | >>> mismatch = matcher.match("<b>rocks</b>") |
194 | - >>> print mismatch.describe() |
195 | + >>> print(mismatch.describe()) |
196 | Matched 1 time |
197 | The match was: |
198 | <b>rocks</b> |
199 | @@ -309,7 +310,7 @@ |
200 | ... "class": "awesome"})) |
201 | >>> mismatch = matcher.match( |
202 | ... "<a href='https://launchpad.net/testtools'></a>") |
203 | - >>> print mismatch.describe() |
204 | + >>> print(mismatch.describe()) |
205 | Matched 0 times |
206 | Here is some information that may be useful: |
207 | 1 matches for "testtools link" when attribute class="awesome" is not a |
208 | @@ -319,7 +320,7 @@ |
209 | ... soupmatchers.Tag("bold rocks", "b", text="rocks")) |
210 | >>> mismatch = matcher.match( |
211 | ... "<b>is awesome</b>") |
212 | - >>> print mismatch.describe() |
213 | + >>> print(mismatch.describe()) |
214 | Matched 0 times |
215 | Here is some information that may be useful: |
216 | 1 matches for "bold rocks" when text="rocks" is not a requirement. |
217 | @@ -342,11 +343,11 @@ |
218 | >>> body_matcher = soupmatchers.Tag("the body", "body") |
219 | >>> matcher = soupmatchers.HTMLContains( |
220 | ... soupmatchers.Within(body_matcher, child_matcher)) |
221 | - >>> print matcher |
222 | + >>> print(matcher) |
223 | HTML contains [Tag("bold rocks", <b ...>rocks ...</b>) |
224 | within Tag("the body", <body ...>...</body>)] |
225 | >>> mismatch = matcher.match("<b>rocks</b><body></body>") |
226 | - >>> print mismatch.describe() |
227 | + >>> print(mismatch.describe()) |
228 | Matched 0 times |
229 | Here is some information that may be useful: |
230 | 1 matches for "bold rocks" in the document. |
231 | |
232 | === modified file 'setup.py' |
233 | --- setup.py 2012-02-08 00:44:34 +0000 |
234 | +++ setup.py 2017-10-20 22:33:14 +0000 |
235 | @@ -17,5 +17,15 @@ |
236 | to your TestCase hierarchy because it makes use of testtools |
237 | Matchers.'''), |
238 | setup_requires=['setuptools'], |
239 | - install_requires=['testtools>0.9.3', 'BeautifulSoup'], |
240 | + install_requires=['testtools>0.9.3', 'beautifulsoup4'], |
241 | + classifiers=[ |
242 | + "Development Status :: 5 - Production/Stable", |
243 | + "Intended Audience :: Developers", |
244 | + "License :: OSI Approved :: Eclipse Public License 1.0 (EPL-1.0)", |
245 | + "Programming Language :: Python", |
246 | + "Programming Language :: Python :: 2", |
247 | + "Programming Language :: Python :: 3", |
248 | + "Topic :: Text Processing :: Markup :: HTML", |
249 | + "Topic :: Software Development :: Testing", |
250 | + ], |
251 | ) |
252 | |
253 | === modified file 'soupmatchers/__init__.py' |
254 | --- soupmatchers/__init__.py 2012-02-08 00:43:44 +0000 |
255 | +++ soupmatchers/__init__.py 2017-10-20 22:33:14 +0000 |
256 | @@ -23,7 +23,7 @@ |
257 | See the README for more information. |
258 | """ |
259 | |
260 | -import BeautifulSoup |
261 | +import bs4 |
262 | |
263 | from testtools import matchers |
264 | from testtools.content import Content |
265 | @@ -47,7 +47,7 @@ |
266 | if self.html is not None: |
267 | return { |
268 | "html": Content(ContentType("text", "html"), |
269 | - lambda: self.html) |
270 | + lambda: [self.html.encode("UTF-8")]) |
271 | } |
272 | return {} |
273 | |
274 | @@ -125,7 +125,7 @@ |
275 | |
276 | def match(self, content): |
277 | if len(self.matchers) > 0: |
278 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
279 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
280 | for matcher in self.matchers: |
281 | match = matcher.match(parsed_content) |
282 | if match is not None: |
283 | @@ -171,7 +171,8 @@ |
284 | def get_details(self): |
285 | details = { |
286 | "html": Content( |
287 | - ContentType("text", "html"), lambda: [str(self.content)]) |
288 | + ContentType("text", "html"), |
289 | + lambda: [str(self.content).encode("UTF-8")]) |
290 | } |
291 | return details |
292 | |
293 | @@ -185,6 +186,10 @@ |
294 | or self.content != other.content |
295 | or self.matches != other.matches) |
296 | |
297 | + def __repr__(self): |
298 | + return "<soupmatchers.TagMismatch object at %x attributes=%r>" % ( |
299 | + id(self), self.__dict__) |
300 | + |
301 | |
302 | class DocumentPart(matchers.Matcher): |
303 | |
304 | @@ -266,8 +271,8 @@ |
305 | is a key in the dict, or True, which matches any tag name. |
306 | |
307 | It is also possible to pass a callable as a tag name. The callable |
308 | - should take a BeautifulSoup.Tag object as the argument and |
309 | - return a boolean, with True indicating that the tag should match. |
310 | + should take a bs4.Tag object as the argument and return a boolean, |
311 | + with True indicating that the tag should match. |
312 | |
313 | attrs is a dict that defines what attributes the tag should have. |
314 | The keys are the names of attributes and the values define the |
315 | @@ -324,12 +329,12 @@ |
316 | def _check_text(self, candidates, text): |
317 | if len(candidates) > 0 and text is not _not_passed: |
318 | for candidate in candidates[:]: |
319 | - texts = candidate.findAll(text=text) |
320 | + texts = candidate.find_all(text=text) |
321 | if len(texts) < 1: |
322 | candidates.remove(candidate) |
323 | |
324 | def _get_matches(self, html, attrs, text): |
325 | - candidates = list(html.findAll(self.tag_type, attrs=attrs)) |
326 | + candidates = list(html.find_all(self.tag_type, attrs=attrs)) |
327 | self._check_text(candidates, text) |
328 | return candidates |
329 | |
330 | |
331 | === modified file 'soupmatchers/tests/__init__.py' |
332 | --- soupmatchers/tests/__init__.py 2012-02-08 00:36:21 +0000 |
333 | +++ soupmatchers/tests/__init__.py 2017-10-20 22:33:14 +0000 |
334 | @@ -1,19 +1,22 @@ |
335 | +from __future__ import print_function |
336 | + |
337 | + |
338 | def load_tests(loader, standard_tests, pattern): |
339 | import doctest |
340 | import os |
341 | import sys |
342 | - import unittest |
343 | - suite = unittest.TestSuite() |
344 | - loader = unittest.TestLoader() |
345 | - suite.addTest(loader.loadTestsFromName(__name__)) |
346 | + |
347 | + this_dir = os.path.dirname(__file__) |
348 | + standard_tests.addTests( |
349 | + loader.discover(start_dir=this_dir, pattern=pattern)) |
350 | source_readme_path = os.path.join( |
351 | - os.path.dirname(os.path.dirname(os.path.dirname(__file__))), "README") |
352 | + os.path.dirname(os.path.dirname(this_dir)), "README") |
353 | if os.path.exists(source_readme_path): |
354 | - suite.addTest( |
355 | - doctest.DocFileTest(os.path.relpath( |
356 | - source_readme_path, os.path.dirname(__file__)), |
357 | - optionflags=doctest.NORMALIZE_WHITESPACE)) |
358 | + standard_tests.addTest( |
359 | + doctest.DocFileTest(os.path.relpath(source_readme_path, this_dir), |
360 | + optionflags=doctest.NORMALIZE_WHITESPACE, |
361 | + globs={'print_function': print_function})) |
362 | else: |
363 | sys.stderr.write("Warning: not testing README as it can't be found") |
364 | - return suite |
365 | + return standard_tests |
366 | |
367 | |
368 | === modified file 'soupmatchers/tests/test_matchers.py' |
369 | --- soupmatchers/tests/test_matchers.py 2012-02-08 00:43:44 +0000 |
370 | +++ soupmatchers/tests/test_matchers.py 2017-10-20 22:33:14 +0000 |
371 | @@ -2,7 +2,7 @@ |
372 | |
373 | import re |
374 | |
375 | -import BeautifulSoup |
376 | +import bs4 |
377 | |
378 | from testtools import ( |
379 | matchers as testtools_matchers, |
380 | @@ -53,7 +53,8 @@ |
381 | html = "<image></image>" |
382 | mismatch = matchers.StatusCodeMismatch(200, 404, html=html) |
383 | self.assertEqual( |
384 | - {"html": Content(ContentType("text", "html"), lambda: html)}, |
385 | + {"html": Content( |
386 | + ContentType("text", "html"), lambda: [html.encode("UTF-8")])}, |
387 | mismatch.get_details()) |
388 | |
389 | def get_equal_mismatches(self): |
390 | @@ -129,7 +130,9 @@ |
391 | response = TestResponse(status_code=404, content=content) |
392 | match = matchers.ResponseHas(status_code=200).match(response) |
393 | self.assertEquals( |
394 | - {"html": Content(ContentType("text", "html"), lambda: content)}, |
395 | + {"html": Content( |
396 | + ContentType("text", "html"), |
397 | + lambda: [content.encode("UTF-8")])}, |
398 | match.get_details()) |
399 | |
400 | def test_response_has_content_matches(self): |
401 | @@ -195,7 +198,7 @@ |
402 | html_matcher = matchers.HTMLContains(anchor_matcher) |
403 | content = "<image></image>" |
404 | match = html_matcher.match(content) |
405 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
406 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
407 | self.assertEquals( |
408 | matchers.TagMismatch(anchor_matcher, parsed_content, []), match) |
409 | |
410 | @@ -203,7 +206,7 @@ |
411 | html_matcher = matchers.HTMLContains(anchor_matcher, image_matcher) |
412 | content = "<image></image>" |
413 | match = html_matcher.match(content) |
414 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
415 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
416 | self.assertEquals( |
417 | matchers.TagMismatch(anchor_matcher, parsed_content, []), match) |
418 | |
419 | @@ -211,7 +214,7 @@ |
420 | html_matcher = matchers.HTMLContains(anchor_matcher, image_matcher) |
421 | content = "<b></b>" |
422 | match = html_matcher.match(content) |
423 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
424 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
425 | self.assertEquals( |
426 | matchers.TagMismatch(anchor_matcher, parsed_content, []), match) |
427 | |
428 | @@ -229,13 +232,13 @@ |
429 | |
430 | def test_describe_zero(self): |
431 | content = "<image></image>" |
432 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
433 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
434 | mismatch = matchers.TagMismatch(anchor_matcher, parsed_content, []) |
435 | self.assertEqual("Matched 0 times", mismatch.describe()) |
436 | |
437 | def test_describe_one(self): |
438 | content = "<image></image>" |
439 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
440 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
441 | mismatch = matchers.TagMismatch( |
442 | anchor_matcher, parsed_content, ["<a></a>"]) |
443 | self.assertEqual( |
444 | @@ -244,7 +247,7 @@ |
445 | |
446 | def test_describe_two(self): |
447 | content = "<image></image>" |
448 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
449 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
450 | mismatch = matchers.TagMismatch( |
451 | anchor_matcher, parsed_content, ["<a></a>", "<b></b>"]) |
452 | self.assertEqual( |
453 | @@ -253,15 +256,17 @@ |
454 | |
455 | def test_get_details(self): |
456 | content = "<image></image>" |
457 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
458 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
459 | mismatch = matchers.TagMismatch(anchor_matcher, parsed_content, []) |
460 | self.assertEqual( |
461 | - {"html": Content(ContentType("text", "html"), |
462 | - lambda: str(parsed_content))}, mismatch.get_details()) |
463 | + {"html": Content( |
464 | + ContentType("text", "html"), |
465 | + lambda: [str(parsed_content).encode("UTF-8")])}, |
466 | + mismatch.get_details()) |
467 | |
468 | def test_eq_equal(self): |
469 | content = "<image></image>" |
470 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
471 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
472 | matches = ["<a></a>"] |
473 | mismatch1 = matchers.TagMismatch( |
474 | anchor_matcher, parsed_content, matches) |
475 | @@ -271,7 +276,7 @@ |
476 | |
477 | def test_eq_different_tag(self): |
478 | content = "<image></image>" |
479 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
480 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
481 | matches = ["<a></a>"] |
482 | mismatch1 = matchers.TagMismatch( |
483 | anchor_matcher, parsed_content, matches) |
484 | @@ -281,9 +286,9 @@ |
485 | |
486 | def test_eq_different_content(self): |
487 | content1 = "<image></image>" |
488 | - parsed_content1 = BeautifulSoup.BeautifulSoup(content1) |
489 | + parsed_content1 = bs4.BeautifulSoup(content1, "html.parser") |
490 | content2 = "<div></div>" |
491 | - parsed_content2 = BeautifulSoup.BeautifulSoup(content2) |
492 | + parsed_content2 = bs4.BeautifulSoup(content2, "html.parser") |
493 | matches = ["<a></a>"] |
494 | mismatch1 = matchers.TagMismatch( |
495 | anchor_matcher, parsed_content1, matches) |
496 | @@ -293,7 +298,7 @@ |
497 | |
498 | def test_eq_different_matches(self): |
499 | content = "<image></image>" |
500 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
501 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
502 | matches1 = ["<a></a>"] |
503 | matches2 = ["<b></b>"] |
504 | mismatch1 = matchers.TagMismatch( |
505 | @@ -344,7 +349,7 @@ |
506 | str(tag_matcher)) |
507 | |
508 | def get_match(self, matcher, content): |
509 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
510 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
511 | return matcher.match(parsed_content), parsed_content |
512 | |
513 | def test_matches_one_instance(self): |
514 | @@ -524,14 +529,14 @@ |
515 | |
516 | def test_get_extra_info_no_close_matches(self): |
517 | content = "<image></image>" |
518 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
519 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
520 | extra_info = anchor_matcher.get_extra_info([parsed_content], "") |
521 | self.assertEqual([], extra_info) |
522 | |
523 | def test_get_extra_info_vary_attributes(self): |
524 | tag_matcher = matchers.Tag("foo link", "a", attrs={"href": "foo"}) |
525 | content = "<a></a>" |
526 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
527 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
528 | close_matches = tag_matcher.get_extra_info([parsed_content], "") |
529 | self.assertEqual( |
530 | ['1 matches for "foo link" when attribute href="foo" ' |
531 | @@ -541,7 +546,7 @@ |
532 | tag_matcher = matchers.Tag( |
533 | "foo bar link", "a", attrs={"href": "foo", "class": "bar"}) |
534 | content = "<a href='foo'></a><a class='bar'></a>" |
535 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
536 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
537 | extra_info = tag_matcher.get_extra_info([parsed_content], "") |
538 | self.assertEqual( |
539 | ['1 matches for "foo bar link" when attribute class="bar" ' |
540 | @@ -553,7 +558,7 @@ |
541 | def test_get_extra_info_vary_text(self): |
542 | tag_matcher = matchers.Tag("bold rocks", "b", text="rocks") |
543 | content = "<b>is awesome</b>" |
544 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
545 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
546 | extra_info = tag_matcher.get_extra_info([parsed_content], "") |
547 | self.assertEqual( |
548 | ['1 matches for \"bold rocks\" when text="rocks" is not a ' |
549 | @@ -563,16 +568,16 @@ |
550 | tag_matcher = matchers.Tag( |
551 | "no bold rocks", "b", text="rocks", count=0) |
552 | content = "<b>is awesome</b>" |
553 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
554 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
555 | extra_info = tag_matcher.get_extra_info([parsed_content], "") |
556 | self.assertEqual([], extra_info) |
557 | |
558 | def test_get_extra_info_multiple_roots(self): |
559 | tag_matcher = matchers.Tag("bold rocks", "b", text="rocks") |
560 | content1 = "<b>is awesome</b>" |
561 | - parsed_content1 = BeautifulSoup.BeautifulSoup(content1) |
562 | + parsed_content1 = bs4.BeautifulSoup(content1, "html.parser") |
563 | content2 = "<b>is awesome</b>" |
564 | - parsed_content2 = BeautifulSoup.BeautifulSoup(content2) |
565 | + parsed_content2 = bs4.BeautifulSoup(content2, "html.parser") |
566 | extra_info = tag_matcher.get_extra_info( |
567 | [parsed_content1, parsed_content2], "") |
568 | self.assertEqual( |
569 | @@ -582,7 +587,7 @@ |
570 | def test_get_extra_info_identifier_suffix(self): |
571 | tag_matcher = matchers.Tag("bold rocks", "b", text="rocks") |
572 | content = "<b>is awesome</b>" |
573 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
574 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
575 | extra_info = tag_matcher.get_extra_info([parsed_content], |
576 | " within foo") |
577 | self.assertEqual( |
578 | @@ -644,7 +649,7 @@ |
579 | def test_match_outer_not_matched(self): |
580 | within_matcher = matchers.Within(anchor_matcher, image_matcher) |
581 | content = "<image></image>" |
582 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
583 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
584 | mismatch = within_matcher.match(parsed_content) |
585 | self.assertEqual( |
586 | matchers.TagMismatch(within_matcher, parsed_content, []), |
587 | @@ -653,7 +658,7 @@ |
588 | def test_match_mismatch(self): |
589 | within_matcher = matchers.Within(anchor_matcher, image_matcher) |
590 | content = "<a></a><image></image>" |
591 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
592 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
593 | mismatch = within_matcher.match(parsed_content) |
594 | self.assertEqual( |
595 | matchers.TagMismatch(within_matcher, parsed_content, []), |
596 | @@ -662,7 +667,7 @@ |
597 | def test_match_match_in_one(self): |
598 | within_matcher = matchers.Within(anchor_matcher, image_matcher) |
599 | content = "<a><image></image></a>" |
600 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
601 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
602 | mismatch = within_matcher.match(parsed_content) |
603 | self.assertEqual(None, mismatch) |
604 | |
605 | @@ -676,7 +681,7 @@ |
606 | content = '<div><div></div></div>' |
607 | child_matcher = matchers.Tag("div", 'div') |
608 | tag_matcher = matchers.Within(child_matcher, child_matcher) |
609 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
610 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
611 | match = tag_matcher.match(parsed_content) |
612 | self.assertEqual(None, match) |
613 | |
614 | @@ -690,7 +695,7 @@ |
615 | content = '<div><span></span></div>' |
616 | child_matcher = matchers.Tag("div", 'div') |
617 | tag_matcher = matchers.Within(child_matcher, child_matcher) |
618 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
619 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
620 | match = tag_matcher.match(parsed_content) |
621 | self.assertEqual( |
622 | matchers.TagMismatch(tag_matcher, parsed_content, []), |
623 | @@ -699,7 +704,7 @@ |
624 | def test_get_extra_info_both_missing(self): |
625 | content = '' |
626 | tag_matcher = matchers.Within(anchor_matcher, image_matcher) |
627 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
628 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
629 | extra_info = tag_matcher.get_extra_info([parsed_content], "") |
630 | self.assertEqual( |
631 | ['0 matches for "%s" in the document.' |
632 | @@ -711,7 +716,7 @@ |
633 | def test_get_extra_info_inner_missing(self): |
634 | content = '<a></a>' |
635 | tag_matcher = matchers.Within(anchor_matcher, image_matcher) |
636 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
637 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
638 | extra_info = tag_matcher.get_extra_info([parsed_content], "") |
639 | self.assertEqual( |
640 | ['0 matches for "%s" in the document.' |
641 | @@ -723,7 +728,7 @@ |
642 | def test_get_extra_info_neither_missing(self): |
643 | content = '<a></a><image></image>' |
644 | tag_matcher = matchers.Within(anchor_matcher, image_matcher) |
645 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
646 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
647 | extra_info = tag_matcher.get_extra_info([parsed_content], "") |
648 | self.assertEqual( |
649 | ['1 matches for "%s" in the document.' |
650 | @@ -736,7 +741,7 @@ |
651 | content = '<a><b>is awesome</b></a>' |
652 | bold_matcher = matchers.Tag("bold rocks", "b", text="rocks") |
653 | tag_matcher = matchers.Within(anchor_matcher, bold_matcher) |
654 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
655 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
656 | extra_info = tag_matcher.get_extra_info([parsed_content], "") |
657 | self.assertEqual( |
658 | ['0 matches for "bold rocks" in the document.', |
659 | @@ -749,7 +754,7 @@ |
660 | def test_get_extra_info_multiple_parts(self): |
661 | content = '<a></a><image></image>' |
662 | tag_matcher = matchers.Within(anchor_matcher, image_matcher) |
663 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
664 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
665 | extra_info = tag_matcher.get_extra_info( |
666 | [parsed_content, parsed_content], "") |
667 | self.assertEqual( |
668 | @@ -762,7 +767,7 @@ |
669 | def test_get_extra_info_with_suffix(self): |
670 | content = '<a></a><image></image>' |
671 | tag_matcher = matchers.Within(anchor_matcher, image_matcher) |
672 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
673 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
674 | extra_info = tag_matcher.get_extra_info([parsed_content], |
675 | " within foo") |
676 | self.assertEqual( |
677 | @@ -775,7 +780,7 @@ |
678 | def test_get_matches(self): |
679 | within_matcher = matchers.Within(anchor_matcher, image_matcher) |
680 | content = "<a><image></image></a>" |
681 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
682 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
683 | matches = within_matcher.get_matches(parsed_content) |
684 | self.assertEqual([content], [str(a) for a in matches]) |
685 | |
686 | @@ -910,7 +915,7 @@ |
687 | def test_get_matches_outer_missing(self): |
688 | select_matcher = self.get_select_matcher_with_single_choice() |
689 | content = '<option value="choice1">Choice 1</option>' |
690 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
691 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
692 | matches = select_matcher.get_matches(parsed_content) |
693 | self.assertEqual([], matches) |
694 | |
695 | @@ -919,7 +924,7 @@ |
696 | choices={"choice1": "Choice 1", "choice2": "Choice 2"}) |
697 | content = ('<select name="foo"><option value="choice1">' |
698 | 'Choice 1</option></select>') |
699 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
700 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
701 | matches = select_matcher.get_matches(parsed_content) |
702 | self.assertEqual([], matches) |
703 | |
704 | @@ -929,7 +934,7 @@ |
705 | content = ('<select name="foo"><option value="choice1">' |
706 | 'Choice 1</option><option value="choice2">' |
707 | 'Choice 2</option></select>') |
708 | - parsed_content = BeautifulSoup.BeautifulSoup(content) |
709 | + parsed_content = bs4.BeautifulSoup(content, "html.parser") |
710 | matches = select_matcher.get_matches(parsed_content) |
711 | self.assertEqual([content], [str(a) for a in matches]) |
712 | |
713 | @@ -941,14 +946,14 @@ |
714 | 'Choice 2</option></select>') |
715 | unmatched_content = ('<select name="foo"><option value="choice1">' |
716 | 'Choice 1</option></select>') |
717 | - parsed_content = BeautifulSoup.BeautifulSoup( |
718 | - content+unmatched_content) |
719 | + parsed_content = bs4.BeautifulSoup( |
720 | + content+unmatched_content, "html.parser") |
721 | matches = select_matcher.get_matches(parsed_content) |
722 | self.assertEqual([content], [str(a) for a in matches]) |
723 | |
724 | def test_match_twice(self): |
725 | select_matcher = self.get_select_matcher_with_single_choice() |
726 | content = '<select><option value="choice1">Choice 1</option></select>' |
727 | - parsed_content = BeautifulSoup.BeautifulSoup(content*2) |
728 | + parsed_content = bs4.BeautifulSoup(content*2, "html.parser") |
729 | matches = select_matcher.get_matches(parsed_content) |
730 | self.assertEqual([content]*2, [str(a) for a in matches]) |