1
=== modified file 'openlp/plugins/bibles/lib/common.py'
2
--- openlp/plugins/bibles/lib/common.py	2009-06-20 19:11:17 +0000
3
+++ openlp/plugins/bibles/lib/common.py	2009-07-07 20:18:36 +0000
4
@@ -19,93 +19,140 @@
5
19
import os.path
19
import os.path
6
20
import sys
20
import sys
7
21
import urllib2
21
import urllib2
9
22
22
import chardet
10
23
import logging
23
import logging
11
24
24
12
25
class SearchResults:
25
class SearchResults:
13
26
    """
14
27
    Encapsulate a set of search results. This is Bible-type independant.
15
28
    """
16
26
    def __init__(self, book, chapter, verselist):
29
    def __init__(self, book, chapter, verselist):
17
30
        """
18
31
        Create the search result object.
19
32
20
33
        ``book``
21
34
            The book of the Bible.
22
35
23
36
        ``chapter``
24
37
            The chapter of the book.
25
38
26
39
        ``verselist``
27
40
            The list of verses for this reading
28
41
        """
29
27
        self.book = book
42
        self.book = book
30
28
        self.chapter = chapter
43
        self.chapter = chapter
31
29
        self.verselist = verselist
44
        self.verselist = verselist
32
45
33
30
    def get_verselist(self):
46
    def get_verselist(self):
34
47
        """
35
48
        Returns the list of verses.
36
49
        """
37
31
        return self.verselist
50
        return self.verselist
38
51
39
32
    def get_book(self):
52
    def get_book(self):
40
53
        """
41
54
        Returns the book of the Bible.
42
55
        """
43
33
        return self.book
56
        return self.book
44
57
45
34
    def get_chapter(self):
58
    def get_chapter(self):
46
59
        """
47
60
        Returns the chapter of the book.
48
61
        """
49
35
        return self.chapter
62
        return self.chapter
50
63
51
36
    def has_verselist(self):
64
    def has_verselist(self):
58
37
        if self.verselist == {}:
65
        """
59
38
            return False
66
        Returns whether or not the verse list contains verses.
60
39
        else:
67
        """
61
40
            return True
68
        return len(self.verselist) > 0
62
41
69
63
42
class BibleCommon:
70
64
71
class BibleCommon(object):
65
72
    """
66
73
    A common ancestor for bible download sites.
67
74
    """
68
43
    global log
75
    global log
69
44
    log = logging.getLogger(u'BibleCommon')
76
    log = logging.getLogger(u'BibleCommon')
70
45
    log.info(u'BibleCommon')
77
    log.info(u'BibleCommon')
71
78
72
46
    def __init__(self):
79
    def __init__(self):
73
47
        """
80
        """
74
81
        An empty constructor... not sure why I'm here.
75
48
        """
82
        """
76
83
        pass
77
84
78
49
    def _get_web_text(self, urlstring, proxyurl):
85
    def _get_web_text(self, urlstring, proxyurl):
79
86
        """
80
87
        Get the HTML from the web page.
81
88
82
89
        ``urlstring``
83
90
            The URL of the page to open.
84
91
85
92
        ``proxyurl``
86
93
            The URL of a proxy server used to access the Internet.
87
94
        """
88
50
        log.debug(u'get_web_text %s %s', proxyurl, urlstring)
95
        log.debug(u'get_web_text %s %s', proxyurl, urlstring)
91
51
        if  not proxyurl == None:
96
        if proxyurl is not None:
92
52
            proxy_support = urllib2.ProxyHandler({'http':  self.proxyurl})
97
            proxy_support = urllib2.ProxyHandler({'http': self.proxyurl})
93
53
            http_support = urllib2.HTTPHandler()
98
            http_support = urllib2.HTTPHandler()
95
54
            opener= urllib2.build_opener(proxy_support, http_support)
99
            opener = urllib2.build_opener(proxy_support, http_support)
96
55
            urllib2.install_opener(opener)
100
            urllib2.install_opener(opener)
97
56
        xml_string = u''
101
        xml_string = u''
98
57
        req = urllib2.Request(urlstring)
102
        req = urllib2.Request(urlstring)
100
58
        req.add_header(u'User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
103
        req.add_header('User-Agent', 'Mozilla/4.0 (compatible; MSIE 5.5; Windows NT)')
101
59
        try:
104
        try:
102
60
            handle = urllib2.urlopen(req)
105
            handle = urllib2.urlopen(req)
104
61
            xml_string = unicode(handle.read())
106
            html = handle.read()
105
107
            details = chardet.detect(html)
106
108
            xml_string = unicode(html, details['encoding'])
107
62
        except IOError, e:
109
        except IOError, e:
108
63
            if hasattr(e, u'reason'):
110
            if hasattr(e, u'reason'):
111
64
                log.error(u'Reason : ')
111
                log.error(u'Reason : %s', e.reason)
110
65
                log.error( e.reason)
112
66
        return xml_string
112
        return xml_string
113
67
113
114
68
    def _clean_text(self, text):
114
    def _clean_text(self, text):
115
69
        """
115
        """
118
70
        Clean up text and remove extra characters
116
        Clean up text and remove extra characters after been downloaded from
119
71
        after been downloaded from web
117
        the Internet.
120
118
121
119
        ``text``
122
120
            The text from the web page that needs to be cleaned up.
123
72
        """
121
        """
124
73
        #return text.rstrip()
122
        #return text.rstrip()
125
74
        # Remove Headings from the Text
123
        # Remove Headings from the Text
132
75
        i = text.find(u'<h')
124
        start_tag = text.find(u'<h')
133
76
        while i > -1:
125
        while start_tag > -1:
134
77
            j=text.find(u'</h', i)
126
            end_tag = text.find(u'</h', start_tag)
135
78
            text = text[ : (i - 1)]+text[(j+4)]
127
            text = text[:(start_tag - 1)] + text[(end_tag + 4)]
136
79
            i = text.find(u'<h')
128
            start_tag = text.find(u'<h')
131
80
137
81
        # Remove Support References from the Text
129
        # Remove Support References from the Text
144
82
        x = text.find(u'<sup>')
130
        start_tag = text.find(u'<sup>')
145
83
        while x > -1:
131
        while start_tag > -1:
146
84
            y = text.find(u'</sup>')
132
            end_tag = text.find(u'</sup>')
147
85
            text= text[:x] + text[y + 6:len(text)]
133
            text = text[:start_tag] + text[end_tag + 6:len(text)]
148
86
            x = text.find(u'<sup>')
134
            start_tag = text.find(u'<sup>')
143
87
149
88
        # Static Clean ups
135
        # Static Clean ups
172
89
        text= text.replace(u'\n', u'')
136
        text = text.replace(u'\n', u'')
173
90
        text= text.replace(u'\r', u'')
137
        text = text.replace(u'\r', u'')
174
91
        text= text.replace(u'&nbsp;', u'')
138
        text = text.replace(u'&nbsp;', u'')
175
92
        text= text.replace(u'<P>', u'')
139
        text = text.replace(u'<P>', u'')
176
93
        text= text.replace(u'<I>', u'')
140
        text = text.replace(u'<I>', u'')
177
94
        text= text.replace(u'</I>', u'')
141
        text = text.replace(u'</I>', u'')
178
95
        text= text.replace(u'<P />', u'')
142
        text = text.replace(u'<P />', u'')
179
96
        text= text.replace(u'<p />', u'')
143
        text = text.replace(u'<p />', u'')
180
97
        text= text.replace(u'</P>', u'')
144
        text = text.replace(u'</P>', u'')
181
98
        text= text.replace(u'<BR>', u'')
145
        text = text.replace(u'<BR>', u'')
182
99
        text= text.replace(u'<BR />', u'')
146
        text = text.replace(u'<BR />', u'')
183
100
        #text= text.replace(chr(189), u'1/2');print "l"
147
        #text = text.replace(chr(189), u'1/2');print "l"
184
101
        text= text.replace(u'&quot;', "'")
148
        text = text.replace(u'&quot;', u'\"')
185
102
        text= text.replace(u'&apos;', "'")
149
        text = text.replace(u'&apos;', u'\'')
186
103
150
        # Remove some other tags
187
104
        i = text.find(u'<')
151
        start_tag = text.find(u'<')
188
105
        while i > -1 :
152
        while start_tag > -1 :
189
106
            j = text.find(u'>', i)
153
            end_tag = text.find(u'>', start_tag)
190
107
            text= text[:i] + text[j+1:]
154
            text = text[:start_tag] + text[end_tag + 1:]
191
108
            i = text.find(u'<')
155
            start_tag = text.find(u'<')
192
109
156
        text = text.replace(u'>', u'')
171
110
        text= text.replace(u'>', u'')
193
111
        return text.rstrip()
157
        return text.rstrip()
194
158
Status:	Merged
Merged at revision:	not available
Proposed branch:	lp:~raoul-snyman/openlp/biblefixes
Merge into:	lp:openlp
Diff against target:	None lines
To merge this branch:	bzr merge lp:~raoul-snyman/openlp/biblefixes
Related bugs:	Link a bug report
Reviewer	Review Type	Date Requested	Status
Tim Bentley		2009-07-07	Approve on 2009-07-08
Review via email: mp+8345@code.launchpad.net