Merge lp:~jerith/ibid/split_utils into lp:~ibid-core/ibid/old-trunk-1.6

Proposed by Jeremy Thurgood
Status: Superseded
Proposed branch: lp:~jerith/ibid/split_utils
Merge into: lp:~ibid-core/ibid/old-trunk-1.6
Diff against target: 173 lines (+52/-50)
6 files modified
ibid/plugins/feeds.py (+2/-1)
ibid/plugins/icecast.py (+2/-1)
ibid/plugins/lookup.py (+3/-2)
ibid/plugins/url.py (+1/-1)
ibid/utils/__init__.py (+0/-45)
ibid/utils/html.py (+44/-0)
To merge this branch: bzr merge lp:~jerith/ibid/split_utils
Reviewer Review Type Date Requested Status
Michael Gorven Needs Fixing
Jonathan Hitchcock Pending
Review via email: mp+16618@code.launchpad.net

This proposal supersedes a proposal from 2009-12-28.

This proposal has been superseded by a proposal from 2009-12-28.

To post a comment you must log in.
Revision history for this message
Jeremy Thurgood (jerith) wrote : Posted in a previous version of this proposal

A small amount of reorganisation makes html5lib and BeautifulSoup optional dependencies.

Revision history for this message
Jonathan Hitchcock (vhata) wrote : Posted in a previous version of this proposal

ImportError: No module named ibid.utils.html

review: Needs Fixing
Revision history for this message
Michael Gorven (mgorven) wrote : Posted in a previous version of this proposal

You forgot to add ibid/utils/html.py.
 review needs_fixing

review: Needs Fixing
Revision history for this message
Michael Gorven (mgorven) wrote :

  File "/home/mgorven/bzr/ibid/jerith/split_utils/ibid/utils/html.py", line 7,
in get_html_parse_tree
    req = urllib2.Request(url, data, headers)
NameError: global name 'urllib2' is not defined

 review needs_fixing

review: Needs Fixing
lp:~jerith/ibid/split_utils updated
810. By Jeremy Thurgood

Fixed broken imports.

Unmerged revisions

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'ibid/plugins/feeds.py'
2--- ibid/plugins/feeds.py 2009-12-10 09:56:44 +0000
3+++ ibid/plugins/feeds.py 2009-12-28 13:53:13 +0000
4@@ -12,7 +12,8 @@
5 from ibid.config import IntOption
6 from ibid.plugins import Processor, match, authorise, run_every
7 from ibid.models import Base, VersionedSchema
8-from ibid.utils import cacheable_download, get_html_parse_tree, human_join
9+from ibid.utils import cacheable_download, human_join
10+from ibid.utils.html import get_html_parse_tree
11
12 help = {'feeds': u'Displays articles from RSS and Atom feeds'}
13
14
15=== modified file 'ibid/plugins/icecast.py'
16--- ibid/plugins/icecast.py 2009-11-30 15:29:43 +0000
17+++ ibid/plugins/icecast.py 2009-12-28 13:53:13 +0000
18@@ -3,7 +3,8 @@
19
20 from ibid.config import DictOption, IntOption
21 from ibid.plugins import Processor, match, run_every
22-from ibid.utils import get_html_parse_tree, human_join
23+from ibid.utils import human_join
24+from ibid.utils.html import get_html_parse_tree
25
26 log = logging.getLogger('plugins.icecast')
27
28
29=== modified file 'ibid/plugins/lookup.py'
30--- ibid/plugins/lookup.py 2009-12-17 16:22:53 +0000
31+++ ibid/plugins/lookup.py 2009-12-28 13:53:13 +0000
32@@ -15,8 +15,9 @@
33 from ibid.compat import defaultdict, dt_strptime, ElementTree
34 from ibid.config import Option, BoolOption, DictOption
35 from ibid.plugins import Processor, match, handler
36-from ibid.utils import ago, decode_htmlentities, get_html_parse_tree, \
37- cacheable_download, json_webservice, human_join, plural
38+from ibid.utils import ago, decode_htmlentities, cacheable_download, \
39+ json_webservice, human_join, plural
40+from ibid.utils.html import get_html_parse_tree
41
42 log = logging.getLogger('plugins.lookup')
43
44
45=== modified file 'ibid/plugins/url.py'
46--- ibid/plugins/url.py 2009-10-18 16:49:32 +0000
47+++ ibid/plugins/url.py 2009-12-28 13:53:13 +0000
48@@ -13,7 +13,7 @@
49 from ibid.plugins import Processor, match, handler
50 from ibid.config import Option, ListOption
51 from ibid.models import Base, VersionedSchema
52-from ibid.utils import get_html_parse_tree
53+from ibid.utils.html import get_html_parse_tree
54
55 help = {'url': u'Captures URLs seen in channel to database and/or to delicious, and shortens and lengthens URLs'}
56
57
58=== added directory 'ibid/utils'
59=== renamed file 'ibid/utils.py' => 'ibid/utils/__init__.py'
60--- ibid/utils.py 2009-12-20 22:09:29 +0000
61+++ ibid/utils/__init__.py 2009-12-28 13:53:13 +0000
62@@ -1,4 +1,3 @@
63-import cgi
64 from gzip import GzipFile
65 from htmlentitydefs import name2codepoint
66 import os
67@@ -12,8 +11,6 @@
68 import zlib
69
70 from dateutil.tz import tzlocal, tzutc
71-from html5lib import HTMLParser, treebuilders
72-from BeautifulSoup import BeautifulSoup
73
74 import ibid
75 from ibid.compat import defaultdict, ElementTree, json
76@@ -161,48 +158,6 @@
77 class ContentTypeException(Exception):
78 pass
79
80-def get_html_parse_tree(url, data=None, headers={}, treetype='beautifulsoup'):
81- "Request a URL, parse with html5lib, and return a parse tree from it"
82-
83- req = urllib2.Request(url, data, headers)
84- f = urllib2.urlopen(req)
85-
86- if f.info().gettype() not in ('text/html', 'application/xhtml+xml'):
87- f.close()
88- raise ContentTypeException("Content type isn't HTML, but " + f.info().gettype())
89-
90- data = f.read()
91- f.close()
92-
93- encoding = None
94- contentType = f.headers.get('content-type')
95- if contentType:
96- (mediaType, params) = cgi.parse_header(contentType)
97- encoding = params.get('charset')
98-
99- compression = f.headers.get('content-encoding')
100- if compression:
101- if compression.lower() == "deflate":
102- try:
103- data = zlib.decompress(data)
104- except zlib.error:
105- data = zlib.decompress(data, -zlib.MAX_WBITS)
106- elif compression.lower() == "gzip":
107- compressedstream = StringIO(data)
108- gzipper = GzipFile(fileobj=compressedstream)
109- data = gzipper.read()
110-
111- if treetype == "beautifulsoup":
112- return BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES)
113- elif treetype == "etree":
114- treebuilder = treebuilders.getTreeBuilder("etree", ElementTree)
115- else:
116- treebuilder = treebuilders.getTreeBuilder(treetype)
117-
118- parser = HTMLParser(tree=treebuilder)
119-
120- return parser.parse(data, encoding = encoding)
121-
122 class JSONException(Exception):
123 pass
124
125
126=== added file 'ibid/utils/html.py'
127--- ibid/utils/html.py 1970-01-01 00:00:00 +0000
128+++ ibid/utils/html.py 2009-12-28 13:53:13 +0000
129@@ -0,0 +1,44 @@
130+from html5lib import HTMLParser, treebuilders
131+from BeautifulSoup import BeautifulSoup
132+
133+def get_html_parse_tree(url, data=None, headers={}, treetype='beautifulsoup'):
134+ "Request a URL, parse with html5lib, and return a parse tree from it"
135+
136+ req = urllib2.Request(url, data, headers)
137+ f = urllib2.urlopen(req)
138+
139+ if f.info().gettype() not in ('text/html', 'application/xhtml+xml'):
140+ f.close()
141+ raise ContentTypeException("Content type isn't HTML, but " + f.info().gettype())
142+
143+ data = f.read()
144+ f.close()
145+
146+ encoding = None
147+ contentType = f.headers.get('content-type')
148+ if contentType:
149+ (mediaType, params) = cgi.parse_header(contentType)
150+ encoding = params.get('charset')
151+
152+ compression = f.headers.get('content-encoding')
153+ if compression:
154+ if compression.lower() == "deflate":
155+ try:
156+ data = zlib.decompress(data)
157+ except zlib.error:
158+ data = zlib.decompress(data, -zlib.MAX_WBITS)
159+ elif compression.lower() == "gzip":
160+ compressedstream = StringIO(data)
161+ gzipper = GzipFile(fileobj=compressedstream)
162+ data = gzipper.read()
163+
164+ if treetype == "beautifulsoup":
165+ return BeautifulSoup(data, convertEntities=BeautifulSoup.HTML_ENTITIES)
166+ elif treetype == "etree":
167+ treebuilder = treebuilders.getTreeBuilder("etree", ElementTree)
168+ else:
169+ treebuilder = treebuilders.getTreeBuilder(treetype)
170+
171+ parser = HTMLParser(tree=treebuilder)
172+
173+ return parser.parse(data, encoding = encoding)

Subscribers

People subscribed via source and target branches