Merge lp:~tomasgroth/openlp/24bugfix-backport4 into lp:openlp/2.4

Proposed by Tomas Groth
Status: Merged
Approved by: Raoul Snyman
Approved revision: 2644
Merged at revision: 2644
Proposed branch: lp:~tomasgroth/openlp/24bugfix-backport4
Merge into: lp:openlp/2.4
Diff against target: 180 lines (+39/-29)
2 files modified
openlp/plugins/bibles/lib/http.py (+28/-22)
tests/interfaces/openlp_plugins/bibles/test_lib_http.py (+11/-7)
To merge this branch: bzr merge lp:~tomasgroth/openlp/24bugfix-backport4
Reviewer Review Type Date Requested Status
Raoul Snyman Approve
Review via email: mp+301319@code.launchpad.net

Description of the change

Use BibleGateway standard site instead of the legacy site. Fixes bug 1562384.
Update Crosswalk webpage parser to match new layout. Fixes bug 1599999.

To post a comment you must log in.
Revision history for this message
Tomas Groth (tomasgroth) wrote :
Revision history for this message
Raoul Snyman (raoul-snyman) :
review: Approve

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'openlp/plugins/bibles/lib/http.py'
2--- openlp/plugins/bibles/lib/http.py 2016-05-09 19:41:23 +0000
3+++ openlp/plugins/bibles/lib/http.py 2016-07-27 19:32:31 +0000
4@@ -248,7 +248,7 @@
5 url_book_name = urllib.parse.quote(book_name.encode("utf-8"))
6 url_params = 'search=%s+%s&version=%s' % (url_book_name, chapter, version)
7 soup = get_soup_for_bible_ref(
8- 'http://legacy.biblegateway.com/passage/?%s' % url_params,
9+ 'http://biblegateway.com/passage/?%s' % url_params,
10 pre_parse_regex=r'<meta name.*?/>', pre_parse_substitute='')
11 if not soup:
12 return None
13@@ -277,7 +277,7 @@
14 """
15 log.debug('BGExtract.get_books_from_http("%s")', version)
16 url_params = urllib.parse.urlencode({'action': 'getVersionInfo', 'vid': '%s' % version})
17- reference_url = 'http://legacy.biblegateway.com/versions/?%s#books' % url_params
18+ reference_url = 'http://biblegateway.com/versions/?%s#books' % url_params
19 page = get_web_page(reference_url)
20 if not page:
21 send_error_message('download')
22@@ -308,7 +308,7 @@
23 for book in content:
24 book = book.find('td')
25 if book:
26- books.append(book.contents[0])
27+ books.append(book.contents[1])
28 return books
29
30 def get_bibles_from_http(self):
31@@ -318,11 +318,11 @@
32 returns a list in the form [(biblename, biblekey, language_code)]
33 """
34 log.debug('BGExtract.get_bibles_from_http')
35- bible_url = 'https://legacy.biblegateway.com/versions/'
36+ bible_url = 'https://biblegateway.com/versions/'
37 soup = get_soup_for_bible_ref(bible_url)
38 if not soup:
39 return None
40- bible_select = soup.find('select', {'class': 'translation-dropdown'})
41+ bible_select = soup.find('select', {'class': 'search-translation-select'})
42 if not bible_select:
43 log.debug('No select tags found - did site change?')
44 return None
45@@ -520,28 +520,26 @@
46 returns a list in the form [(biblename, biblekey, language_code)]
47 """
48 log.debug('CWExtract.get_bibles_from_http')
49- bible_url = 'http://www.biblestudytools.com/'
50+ bible_url = 'http://www.biblestudytools.com/bible-versions/'
51 soup = get_soup_for_bible_ref(bible_url)
52 if not soup:
53 return None
54- bible_select = soup.find('select')
55- if not bible_select:
56- log.debug('No select tags found - did site change?')
57- return None
58- option_tags = bible_select.find_all('option', {'class': 'log-translation'})
59- if not option_tags:
60- log.debug('No option tags found - did site change?')
61+ h4_tags = soup.find_all('h4', {'class': 'small-header'})
62+ if not h4_tags:
63+ log.debug('No h4 tags found - did site change?')
64 return None
65 bibles = []
66- for ot in option_tags:
67- tag_text = ot.get_text().strip()
68- try:
69- tag_value = ot['value']
70- except KeyError:
71- log.exception('No value attribute found - did site change?')
72+ for h4t in h4_tags:
73+ short_name = None
74+ if h4t.span:
75+ short_name = h4t.span.get_text().strip().lower()
76+ else:
77+ log.error('No span tag found - did site change?')
78 return None
79- if not tag_value:
80+ if not short_name:
81 continue
82+ h4t.span.extract()
83+ tag_text = h4t.get_text().strip()
84 # The names of non-english bibles has their language in parentheses at the end
85 if tag_text.endswith(')'):
86 language = tag_text[tag_text.rfind('(') + 1:-1]
87@@ -549,12 +547,20 @@
88 language_code = CROSSWALK_LANGUAGES[language]
89 else:
90 language_code = ''
91- # ... except for the latin vulgate
92+ # ... except for those that don't...
93 elif 'latin' in tag_text.lower():
94 language_code = 'la'
95+ elif 'la biblia' in tag_text.lower() or 'nueva' in tag_text.lower():
96+ language_code = 'es'
97+ elif 'chinese' in tag_text.lower():
98+ language_code = 'zh'
99+ elif 'greek' in tag_text.lower():
100+ language_code = 'el'
101+ elif 'nova' in tag_text.lower():
102+ language_code = 'pt'
103 else:
104 language_code = 'en'
105- bibles.append((tag_text, tag_value, language_code))
106+ bibles.append((tag_text, short_name, language_code))
107 return bibles
108
109
110
111=== modified file 'tests/interfaces/openlp_plugins/bibles/test_lib_http.py'
112--- tests/interfaces/openlp_plugins/bibles/test_lib_http.py 2016-01-07 21:38:13 +0000
113+++ tests/interfaces/openlp_plugins/bibles/test_lib_http.py 2016-07-27 19:32:31 +0000
114@@ -50,7 +50,8 @@
115 books = handler.get_books_from_http('NIV')
116
117 # THEN: We should get back a valid service item
118- assert len(books) == 66, 'The bible should not have had any books added or removed'
119+ self.assertEqual(len(books), 66, 'The bible should not have had any books added or removed')
120+ self.assertEqual(books[0], 'Genesis', 'The first bible book should be Genesis')
121
122 def bible_gateway_extract_books_support_redirect_test(self):
123 """
124@@ -63,7 +64,7 @@
125 books = handler.get_books_from_http('DN1933')
126
127 # THEN: We should get back a valid service item
128- assert len(books) == 66, 'This bible should have 66 books'
129+ self.assertEqual(len(books), 66, 'This bible should have 66 books')
130
131 def bible_gateway_extract_verse_test(self):
132 """
133@@ -76,7 +77,8 @@
134 results = handler.get_bible_chapter('NIV', 'John', 3)
135
136 # THEN: We should get back a valid service item
137- assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
138+ self.assertEqual(len(results.verse_list), 36,
139+ 'The book of John should not have had any verses added or removed')
140
141 def bible_gateway_extract_verse_nkjv_test(self):
142 """
143@@ -89,7 +91,8 @@
144 results = handler.get_bible_chapter('NKJV', 'John', 3)
145
146 # THEN: We should get back a valid service item
147- assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
148+ self.assertEqual(len(results.verse_list), 36,
149+ 'The book of John should not have had any verses added or removed')
150
151 def crosswalk_extract_books_test(self):
152 """
153@@ -102,7 +105,7 @@
154 books = handler.get_books_from_http('niv')
155
156 # THEN: We should get back a valid service item
157- assert len(books) == 66, 'The bible should not have had any books added or removed'
158+ self.assertEqual(len(books), 66, 'The bible should not have had any books added or removed')
159
160 def crosswalk_extract_verse_test(self):
161 """
162@@ -115,7 +118,8 @@
163 results = handler.get_bible_chapter('niv', 'john', 3)
164
165 # THEN: We should get back a valid service item
166- assert len(results.verse_list) == 36, 'The book of John should not have had any verses added or removed'
167+ self.assertEqual(len(results.verse_list), 36,
168+ 'The book of John should not have had any verses added or removed')
169
170 def bibleserver_get_bibles_test(self):
171 """
172@@ -144,7 +148,7 @@
173
174 # THEN: The list should not be None, and some known bibles should be there
175 self.assertIsNotNone(bibles)
176- self.assertIn(('Holman Christian Standard Bible', 'HCSB', 'en'), bibles)
177+ self.assertIn(('Holman Christian Standard Bible (HCSB)', 'HCSB', 'en'), bibles)
178
179 def crosswalk_get_bibles_test(self):
180 """

Subscribers

People subscribed via source and target branches

to all changes: