Status: | Merged |
---|---|
Merged at revision: | 14479 |
Proposed branch: | lp:~user-none/calibre/dev |
Merge into: | lp:calibre |
Diff against target: |
371 lines (+68/-50) 7 files modified
src/calibre/gui2/store/stores/amazon_de_plugin.py (+13/-9) src/calibre/gui2/store/stores/amazon_es_plugin.py (+13/-9) src/calibre/gui2/store/stores/amazon_fr_plugin.py (+13/-10) src/calibre/gui2/store/stores/amazon_it_plugin.py (+13/-10) src/calibre/gui2/store/stores/amazon_plugin.py (+1/-1) src/calibre/gui2/store/stores/amazon_uk_plugin.py (+13/-9) src/calibre/gui2/store/stores/foyles_uk_plugin.py (+2/-2) |
To merge this branch: | bzr merge lp:~user-none/calibre/dev |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Kovid Goyal | Pending | ||
Review via email: mp+150939@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Revision history for this message
Kovid Goyal (kovid) wrote : | # |
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === modified file 'src/calibre/gui2/store/stores/amazon_de_plugin.py' |
2 | --- src/calibre/gui2/store/stores/amazon_de_plugin.py 2013-01-15 09:58:07 +0000 |
3 | +++ src/calibre/gui2/store/stores/amazon_de_plugin.py 2013-02-27 23:47:23 +0000 |
4 | @@ -7,6 +7,7 @@ |
5 | __copyright__ = '2011, John Schember <john@nachtimwald.com>' |
6 | __docformat__ = 'restructuredtext en' |
7 | |
8 | +import re |
9 | from contextlib import closing |
10 | from lxml import html |
11 | |
12 | @@ -49,7 +50,7 @@ |
13 | asin_xpath = '@name' |
14 | cover_xpath = './/img[@class="productImage"]/@src' |
15 | title_xpath = './/h3[@class="newaps"]/a//text()' |
16 | - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' |
17 | + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
18 | price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' |
19 | |
20 | for data in doc.xpath(data_xpath): |
21 | @@ -57,7 +58,7 @@ |
22 | break |
23 | |
24 | # Even though we are searching digital-text only Amazon will still |
25 | - # put in results for non Kindle books (author pages). Se we need |
26 | + # put in results for non Kindle books (authors pages). Se we need |
27 | # to explicitly check if the item is a Kindle book and ignore it |
28 | # if it isn't. |
29 | format_ = ''.join(data.xpath(format_xpath)) |
30 | @@ -75,12 +76,13 @@ |
31 | cover_url = ''.join(data.xpath(cover_xpath)) |
32 | |
33 | title = ''.join(data.xpath(title_xpath)) |
34 | - author = ''.join(data.xpath(author_xpath)) |
35 | - try: |
36 | - if self.author_article: |
37 | - author = author.split(self.author_article, 1)[1].split(" (")[0] |
38 | - except: |
39 | - pass |
40 | + |
41 | + authors = ''.join(data.xpath(author_xpath)) |
42 | + authors = re.sub('^' + self.author_article, '', authors) |
43 | + authors = re.sub(self.and_word, ' & ', authors) |
44 | + mo = re.match(r'(.*)(\(\d.*)$', authors) |
45 | + if mo: |
46 | + authors = mo.group(1).strip() |
47 | |
48 | price = ''.join(data.xpath(price_xpath)) |
49 | |
50 | @@ -89,7 +91,7 @@ |
51 | s = SearchResult() |
52 | s.cover_url = cover_url.strip() |
53 | s.title = title.strip() |
54 | - s.author = author.strip() |
55 | + s.author = authors.strip() |
56 | s.price = price.strip() |
57 | s.detail_item = asin.strip() |
58 | s.drm = SearchResult.DRM_UNKNOWN |
59 | @@ -115,3 +117,5 @@ |
60 | search_url = 'http://www.amazon.de/s/?url=search-alias%3Ddigital-text&field-keywords=' |
61 | |
62 | author_article = 'von ' |
63 | + |
64 | + and_word = ' und ' |
65 | \ No newline at end of file |
66 | |
67 | === modified file 'src/calibre/gui2/store/stores/amazon_es_plugin.py' |
68 | --- src/calibre/gui2/store/stores/amazon_es_plugin.py 2013-01-15 09:58:07 +0000 |
69 | +++ src/calibre/gui2/store/stores/amazon_es_plugin.py 2013-02-27 23:47:23 +0000 |
70 | @@ -7,6 +7,7 @@ |
71 | __copyright__ = '2011, John Schember <john@nachtimwald.com>' |
72 | __docformat__ = 'restructuredtext en' |
73 | |
74 | +import re |
75 | from contextlib import closing |
76 | from lxml import html |
77 | |
78 | @@ -48,7 +49,7 @@ |
79 | asin_xpath = '@name' |
80 | cover_xpath = './/img[@class="productImage"]/@src' |
81 | title_xpath = './/h3[@class="newaps"]/a//text()' |
82 | - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' |
83 | + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
84 | price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' |
85 | |
86 | for data in doc.xpath(data_xpath): |
87 | @@ -56,7 +57,7 @@ |
88 | break |
89 | |
90 | # Even though we are searching digital-text only Amazon will still |
91 | - # put in results for non Kindle books (author pages). Se we need |
92 | + # put in results for non Kindle books (authors pages). Se we need |
93 | # to explicitly check if the item is a Kindle book and ignore it |
94 | # if it isn't. |
95 | format_ = ''.join(data.xpath(format_xpath)) |
96 | @@ -74,12 +75,13 @@ |
97 | cover_url = ''.join(data.xpath(cover_xpath)) |
98 | |
99 | title = ''.join(data.xpath(title_xpath)) |
100 | - author = ''.join(data.xpath(author_xpath)) |
101 | - try: |
102 | - if self.author_article: |
103 | - author = author.split(self.author_article, 1)[1].split(" (")[0] |
104 | - except: |
105 | - pass |
106 | + |
107 | + authors = ''.join(data.xpath(author_xpath)) |
108 | + authors = re.sub('^' + self.author_article, '', authors) |
109 | + authors = re.sub(self.and_word, ' & ', authors) |
110 | + mo = re.match(r'(.*)(\(\d.*)$', authors) |
111 | + if mo: |
112 | + authors = mo.group(1).strip() |
113 | |
114 | price = ''.join(data.xpath(price_xpath)) |
115 | |
116 | @@ -88,7 +90,7 @@ |
117 | s = SearchResult() |
118 | s.cover_url = cover_url.strip() |
119 | s.title = title.strip() |
120 | - s.author = author.strip() |
121 | + s.author = authors.strip() |
122 | s.price = price.strip() |
123 | s.detail_item = asin.strip() |
124 | s.drm = SearchResult.DRM_UNKNOWN |
125 | @@ -113,3 +115,5 @@ |
126 | search_url = 'http://www.amazon.es/s/?url=search-alias%3Ddigital-text&field-keywords=' |
127 | |
128 | author_article = 'de ' |
129 | + |
130 | + and_word = ' y ' |
131 | \ No newline at end of file |
132 | |
133 | === modified file 'src/calibre/gui2/store/stores/amazon_fr_plugin.py' |
134 | --- src/calibre/gui2/store/stores/amazon_fr_plugin.py 2013-01-15 09:58:07 +0000 |
135 | +++ src/calibre/gui2/store/stores/amazon_fr_plugin.py 2013-02-27 23:47:23 +0000 |
136 | @@ -7,7 +7,7 @@ |
137 | __copyright__ = '2011, John Schember <john@nachtimwald.com>' |
138 | __docformat__ = 'restructuredtext en' |
139 | |
140 | - |
141 | +import re |
142 | from contextlib import closing |
143 | from lxml import html |
144 | |
145 | @@ -50,7 +50,7 @@ |
146 | asin_xpath = '@name' |
147 | cover_xpath = './/img[@class="productImage"]/@src' |
148 | title_xpath = './/h3[@class="newaps"]/a//text()' |
149 | - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' |
150 | + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
151 | price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' |
152 | |
153 | for data in doc.xpath(data_xpath): |
154 | @@ -58,7 +58,7 @@ |
155 | break |
156 | |
157 | # Even though we are searching digital-text only Amazon will still |
158 | - # put in results for non Kindle books (author pages). Se we need |
159 | + # put in results for non Kindle books (authors pages). Se we need |
160 | # to explicitly check if the item is a Kindle book and ignore it |
161 | # if it isn't. |
162 | format_ = ''.join(data.xpath(format_xpath)) |
163 | @@ -76,12 +76,13 @@ |
164 | cover_url = ''.join(data.xpath(cover_xpath)) |
165 | |
166 | title = ''.join(data.xpath(title_xpath)) |
167 | - author = ''.join(data.xpath(author_xpath)) |
168 | - try: |
169 | - if self.author_article: |
170 | - author = author.split(self.author_article, 1)[1].split(" (")[0] |
171 | - except: |
172 | - pass |
173 | + |
174 | + authors = ''.join(data.xpath(author_xpath)) |
175 | + authors = re.sub('^' + self.author_article, '', authors) |
176 | + authors = re.sub(self.and_word, ' & ', authors) |
177 | + mo = re.match(r'(.*)(\(\d.*)$', authors) |
178 | + if mo: |
179 | + authors = mo.group(1).strip() |
180 | |
181 | price = ''.join(data.xpath(price_xpath)) |
182 | |
183 | @@ -90,7 +91,7 @@ |
184 | s = SearchResult() |
185 | s.cover_url = cover_url.strip() |
186 | s.title = title.strip() |
187 | - s.author = author.strip() |
188 | + s.author = authors.strip() |
189 | s.price = price.strip() |
190 | s.detail_item = asin.strip() |
191 | s.drm = SearchResult.DRM_UNKNOWN |
192 | @@ -112,3 +113,5 @@ |
193 | search_url = 'http://www.amazon.fr/s/?url=search-alias%3Ddigital-text&field-keywords=' |
194 | |
195 | author_article = 'de ' |
196 | + |
197 | + and_word = ' et ' |
198 | |
199 | === modified file 'src/calibre/gui2/store/stores/amazon_it_plugin.py' |
200 | --- src/calibre/gui2/store/stores/amazon_it_plugin.py 2013-01-15 09:58:07 +0000 |
201 | +++ src/calibre/gui2/store/stores/amazon_it_plugin.py 2013-02-27 23:47:23 +0000 |
202 | @@ -7,6 +7,7 @@ |
203 | __copyright__ = '2011, John Schember <john@nachtimwald.com>' |
204 | __docformat__ = 'restructuredtext en' |
205 | |
206 | +import re |
207 | from contextlib import closing |
208 | from lxml import html |
209 | |
210 | @@ -48,7 +49,7 @@ |
211 | asin_xpath = '@name' |
212 | cover_xpath = './/img[@class="productImage"]/@src' |
213 | title_xpath = './/h3[@class="newaps"]/a//text()' |
214 | - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' |
215 | + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
216 | price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' |
217 | |
218 | for data in doc.xpath(data_xpath): |
219 | @@ -56,7 +57,7 @@ |
220 | break |
221 | |
222 | # Even though we are searching digital-text only Amazon will still |
223 | - # put in results for non Kindle books (author pages). Se we need |
224 | + # put in results for non Kindle books (authors pages). Se we need |
225 | # to explicitly check if the item is a Kindle book and ignore it |
226 | # if it isn't. |
227 | format_ = ''.join(data.xpath(format_xpath)) |
228 | @@ -74,12 +75,13 @@ |
229 | cover_url = ''.join(data.xpath(cover_xpath)) |
230 | |
231 | title = ''.join(data.xpath(title_xpath)) |
232 | - author = ''.join(data.xpath(author_xpath)) |
233 | - try: |
234 | - if self.author_article: |
235 | - author = author.split(self.author_article, 1)[1].split(" (")[0] |
236 | - except: |
237 | - pass |
238 | + |
239 | + authors = ''.join(data.xpath(author_xpath)) |
240 | + authors = re.sub('^' + self.author_article, '', authors) |
241 | + authors = re.sub(self.and_word, ' & ', authors) |
242 | + mo = re.match(r'(.*)(\(\d.*)$', authors) |
243 | + if mo: |
244 | + authors = mo.group(1).strip() |
245 | |
246 | price = ''.join(data.xpath(price_xpath)) |
247 | |
248 | @@ -88,7 +90,7 @@ |
249 | s = SearchResult() |
250 | s.cover_url = cover_url.strip() |
251 | s.title = title.strip() |
252 | - s.author = author.strip() |
253 | + s.author = authors.strip() |
254 | s.price = price.strip() |
255 | s.detail_item = asin.strip() |
256 | s.drm = SearchResult.DRM_UNKNOWN |
257 | @@ -99,7 +101,6 @@ |
258 | def get_details(self, search_result, timeout): |
259 | pass |
260 | |
261 | - |
262 | class AmazonITKindleStore(AmazonEUBase): |
263 | ''' |
264 | For comments on the implementation, please see amazon_plugin.py |
265 | @@ -114,3 +115,5 @@ |
266 | search_url = 'http://www.amazon.it/s/?url=search-alias%3Ddigital-text&field-keywords=' |
267 | |
268 | author_article = 'di ' |
269 | + |
270 | + and_word = ' e ' |
271 | \ No newline at end of file |
272 | |
273 | === modified file 'src/calibre/gui2/store/stores/amazon_plugin.py' |
274 | --- src/calibre/gui2/store/stores/amazon_plugin.py 2013-01-17 11:17:06 +0000 |
275 | +++ src/calibre/gui2/store/stores/amazon_plugin.py 2013-02-27 23:47:23 +0000 |
276 | @@ -133,7 +133,7 @@ |
277 | asin_xpath = '@name' |
278 | cover_xpath = './/img[@class="productImage"]/@src' |
279 | title_xpath = './/h3[@class="newaps"]/a//text()' |
280 | - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' |
281 | + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
282 | price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' |
283 | |
284 | for data in doc.xpath(data_xpath): |
285 | |
286 | === modified file 'src/calibre/gui2/store/stores/amazon_uk_plugin.py' |
287 | --- src/calibre/gui2/store/stores/amazon_uk_plugin.py 2013-01-15 09:58:07 +0000 |
288 | +++ src/calibre/gui2/store/stores/amazon_uk_plugin.py 2013-02-27 23:47:23 +0000 |
289 | @@ -7,6 +7,7 @@ |
290 | __copyright__ = '2011, John Schember <john@nachtimwald.com>' |
291 | __docformat__ = 'restructuredtext en' |
292 | |
293 | +import re |
294 | from contextlib import closing |
295 | from lxml import html |
296 | |
297 | @@ -48,7 +49,7 @@ |
298 | asin_xpath = '@name' |
299 | cover_xpath = './/img[@class="productImage"]/@src' |
300 | title_xpath = './/h3[@class="newaps"]/a//text()' |
301 | - author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]/text()' |
302 | + author_xpath = './/h3[@class="newaps"]//span[contains(@class, "reg")]//text()' |
303 | price_xpath = './/ul[contains(@class, "rsltL")]//span[contains(@class, "lrg") and contains(@class, "bld")]/text()' |
304 | |
305 | for data in doc.xpath(data_xpath): |
306 | @@ -56,7 +57,7 @@ |
307 | break |
308 | |
309 | # Even though we are searching digital-text only Amazon will still |
310 | - # put in results for non Kindle books (author pages). Se we need |
311 | + # put in results for non Kindle books (authors pages). Se we need |
312 | # to explicitly check if the item is a Kindle book and ignore it |
313 | # if it isn't. |
314 | format_ = ''.join(data.xpath(format_xpath)) |
315 | @@ -74,12 +75,13 @@ |
316 | cover_url = ''.join(data.xpath(cover_xpath)) |
317 | |
318 | title = ''.join(data.xpath(title_xpath)) |
319 | - author = ''.join(data.xpath(author_xpath)) |
320 | - try: |
321 | - if self.author_article: |
322 | - author = author.split(self.author_article, 1)[1].split(" (")[0] |
323 | - except: |
324 | - pass |
325 | + |
326 | + authors = ''.join(data.xpath(author_xpath)) |
327 | + authors = re.sub('^' + self.author_article, '', authors) |
328 | + authors = re.sub(self.and_word, ' & ', authors) |
329 | + mo = re.match(r'(.*)(\(\d.*)$', authors) |
330 | + if mo: |
331 | + authors = mo.group(1).strip() |
332 | |
333 | price = ''.join(data.xpath(price_xpath)) |
334 | |
335 | @@ -88,7 +90,7 @@ |
336 | s = SearchResult() |
337 | s.cover_url = cover_url.strip() |
338 | s.title = title.strip() |
339 | - s.author = author.strip() |
340 | + s.author = authors.strip() |
341 | s.price = price.strip() |
342 | s.detail_item = asin.strip() |
343 | s.drm = SearchResult.DRM_UNKNOWN |
344 | @@ -112,3 +114,5 @@ |
345 | |
346 | author_article = 'by ' |
347 | |
348 | + and_word = ' and ' |
349 | + |
350 | |
351 | === modified file 'src/calibre/gui2/store/stores/foyles_uk_plugin.py' |
352 | --- src/calibre/gui2/store/stores/foyles_uk_plugin.py 2013-01-14 06:20:23 +0000 |
353 | +++ src/calibre/gui2/store/stores/foyles_uk_plugin.py 2013-02-27 23:47:23 +0000 |
354 | @@ -41,7 +41,7 @@ |
355 | d.exec_() |
356 | |
357 | def search(self, query, max_results=10, timeout=60): |
358 | - url = 'http://ebooks.foyles.co.uk/search_for-' + urllib2.quote(query) |
359 | + url = 'http://ebooks.foyles.co.uk/catalog/search/?query=' + urllib2.quote(query) |
360 | |
361 | br = browser() |
362 | |
363 | @@ -58,7 +58,7 @@ |
364 | cover_url = ''.join(data.xpath('.//p[@class="doc-cover"]/a/img/@src')) |
365 | title = ''.join(data.xpath('.//span[@class="title"]/a/text()')) |
366 | author = ', '.join(data.xpath('.//span[@class="author"]/span[@class="author"]/text()')) |
367 | - price = ''.join(data.xpath('.//span[@class="price"]/text()')) |
368 | + price = ''.join(data.xpath('.//span[@itemprop="price"]/text()')) |
369 | format_ = ''.join(data.xpath('.//p[@class="doc-meta-format"]/span[last()]/text()')) |
370 | format_, ign, drm = format_.partition(' ') |
371 | drm = SearchResult.DRM_LOCKED if 'DRM' in drm else SearchResult.DRM_UNLOCKED |
I have merged, but I noticed you haven't updates the plugin_version so the updates wont go out dynamically. Is that intentional?