Merge lp:~realender/calibre/calibre into lp:~user-none/calibre/store
- calibre
- Merge into store
Proposed by
Alex Stanev
Status: | Merged |
---|---|
Merged at revision: | 8446 |
Proposed branch: | lp:~realender/calibre/calibre |
Merge into: | lp:~user-none/calibre/store |
Diff against target: |
1189 lines (+803/-91) 14 files modified
recipes/idg_se.recipe (+33/-0) recipes/united_daily.recipe (+1/-1) recipes/utrinski.recipe (+71/-0) src/calibre/customize/builtins.py (+11/-0) src/calibre/devices/android/driver.py (+5/-3) src/calibre/devices/eb600/driver.py (+3/-3) src/calibre/ebooks/mobi/debug.py (+293/-8) src/calibre/ebooks/mobi/tbs_periodicals.rst (+189/-0) src/calibre/ebooks/mobi/utils.py (+5/-3) src/calibre/gui2/store/stores/chitanka_plugin.py (+16/-46) src/calibre/gui2/store/stores/eknigi_plugin.py (+88/-0) src/calibre/translations/msgfmt.py (+83/-24) src/calibre/utils/localization.py (+3/-3) src/calibre/web/feeds/recipes/model.py (+2/-0) |
To merge this branch: | bzr merge lp:~realender/calibre/calibre |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
John Schember | Pending | ||
Review via email: mp+68588@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
lp:~realender/calibre/calibre
updated
- 9904. By Alex Stanev
-
Remove affiliate id for now
- 9905. By Alex Stanev
-
Use Kovid's affiliate id 30% of the time
- 9906. By Alex Stanev
-
sync to trunc
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'recipes/idg_se.recipe' |
2 | --- recipes/idg_se.recipe 1970-01-01 00:00:00 +0000 |
3 | +++ recipes/idg_se.recipe 2011-07-22 18:03:36 +0000 |
4 | @@ -0,0 +1,33 @@ |
5 | +__license__ = 'GPLv3' |
6 | + |
7 | +from calibre.web.feeds.news import BasicNewsRecipe |
8 | + |
9 | +class IDGse(BasicNewsRecipe): |
10 | + title = 'IDG' |
11 | + description = 'IDG.se' |
12 | + language = 'se' |
13 | + __author__ = 'zapt0' |
14 | + oldest_article = 1 |
15 | + max_articles_per_feed = 40 |
16 | + no_stylesheets = True |
17 | + encoding = 'ISO-8859-1' |
18 | + remove_javascript = True |
19 | + |
20 | + feeds = [(u'Senaste nytt',u'http://feeds.idg.se/idg/vzzs')] |
21 | + |
22 | + def print_version(self,url): |
23 | + return url + '?articleRenderMode=print&m=print' |
24 | + |
25 | + def get_cover_url(this): |
26 | + return 'http://idgmedia.idg.se/polopoly_fs/2.3275!images/idgmedia_logo_75.jpg' |
27 | + |
28 | + keep_only_tags = [ |
29 | + dict(name='h1'), |
30 | + dict(name='div', attrs={'class':['divColumn1Article']}), |
31 | + ] |
32 | + #remove ads |
33 | + remove_tags = [ |
34 | + dict(name='div', attrs={'id':['preamble_ad']}), |
35 | + dict(name='ul', attrs={'class':['share']}) |
36 | + ] |
37 | + |
38 | |
39 | === modified file 'recipes/united_daily.recipe' |
40 | --- recipes/united_daily.recipe 2011-05-17 15:30:51 +0000 |
41 | +++ recipes/united_daily.recipe 2011-07-22 18:03:36 +0000 |
42 | @@ -64,7 +64,7 @@ |
43 | |
44 | __author__ = 'Eddie Lau' |
45 | __version__ = '1.1' |
46 | - language = 'zh-TW' |
47 | + language = 'zh_TW' |
48 | publisher = 'United Daily News Group' |
49 | description = 'United Daily (Taiwan)' |
50 | category = 'News, Chinese, Taiwan' |
51 | |
52 | === added file 'recipes/utrinski.recipe' |
53 | --- recipes/utrinski.recipe 1970-01-01 00:00:00 +0000 |
54 | +++ recipes/utrinski.recipe 2011-07-22 18:03:36 +0000 |
55 | @@ -0,0 +1,71 @@ |
56 | +#!/usr/bin/env python |
57 | + |
58 | +__license__ = 'GPL v3' |
59 | +__copyright__ = '2011, Darko Spasovski <darko.spasovski at gmail.com>' |
60 | +''' |
61 | +utrinski.com.mk |
62 | +''' |
63 | + |
64 | +import re |
65 | +import datetime |
66 | +from calibre.web.feeds.news import BasicNewsRecipe |
67 | + |
68 | +class UtrinskiVesnik(BasicNewsRecipe): |
69 | + |
70 | + __author__ = 'Darko Spasovski' |
71 | + INDEX = 'http://www.utrinski.com.mk/' |
72 | + title = 'Utrinski Vesnik' |
73 | + description = 'Daily Macedonian newspaper' |
74 | + masthead_url = 'http://www.utrinski.com.mk/images/LogoTop.jpg' |
75 | + language = 'mk' |
76 | + remove_javascript = True |
77 | + publication_type = 'newspaper' |
78 | + category = 'news, Macedonia' |
79 | + oldest_article = 2 |
80 | + max_articles_per_feed = 100 |
81 | + no_stylesheets = True |
82 | + use_embedded_content = False |
83 | + preprocess_regexps = [(re.compile(i[0], re.IGNORECASE | re.DOTALL), i[1]) for i in |
84 | + [ |
85 | + ## Remove anything before the start of the article. |
86 | + (r'<body.*?Article start-->', lambda match: '<body>'), |
87 | + |
88 | + ## Remove anything after the end of the article. |
89 | + (r'<!--Article end.*?</body>', lambda match : '</body>'), |
90 | + ] |
91 | + ] |
92 | + extra_css = """ |
93 | + body{font-family: Arial,Helvetica,sans-serif} |
94 | + .WB_UTRINSKIVESNIK_Naslov{FONT-WEIGHT: bold; FONT-SIZE: 18px; FONT-FAMILY: Arial, Verdana, Tahoma; TEXT-DECORATION: none} |
95 | + """ |
96 | + |
97 | + conversion_options = { |
98 | + 'comment' : description, |
99 | + 'tags' : category, |
100 | + 'language' : language, |
101 | + 'linearize_tables' : True |
102 | + } |
103 | + |
104 | + def parse_index(self): |
105 | + soup = self.index_to_soup(self.INDEX) |
106 | + feeds = [] |
107 | + for section in soup.findAll('a', attrs={'class':'WB_UTRINSKIVESNIK_TOCTitleBig'}): |
108 | + sectionTitle = section.contents[0].string |
109 | + tocItemTable = section.findAllPrevious('table')[1] |
110 | + if tocItemTable is None: continue |
111 | + articles = [] |
112 | + while True: |
113 | + tocItemTable = tocItemTable.nextSibling |
114 | + if tocItemTable is None: break |
115 | + article = tocItemTable.findAll('a', attrs={'class': 'WB_UTRINSKIVESNIK_TocItem'}) |
116 | + if len(article)==0: break |
117 | + title = self.tag_to_string(article[0], use_alt=True).strip() |
118 | + articles.append({'title': title, 'url':'http://www.utrinski.com.mk/' + article[0]['href'], 'description':'', 'date':''}) |
119 | + if articles: |
120 | + feeds.append((sectionTitle, articles)) |
121 | + return feeds |
122 | + |
123 | + |
124 | + def get_cover_url(self): |
125 | + datum = datetime.datetime.today().strftime('%d_%m_%Y') |
126 | + return 'http://www.utrinski.com.mk/WBStorage/Files/' + datum + '.jpg' |
127 | |
128 | === modified file 'src/calibre/customize/builtins.py' |
129 | --- src/calibre/customize/builtins.py 2011-07-19 22:48:25 +0000 |
130 | +++ src/calibre/customize/builtins.py 2011-07-22 18:03:36 +0000 |
131 | @@ -1258,6 +1258,16 @@ |
132 | formats = ['EPUB', 'PDF'] |
133 | affiliate = True |
134 | |
135 | +class StoreEKnigiStore(StoreBase): |
136 | + name = u'еКниги' |
137 | + author = 'Alex Stanev' |
138 | + description = u'Онлайн книжарница за електронни книги и аудио риалити романи' |
139 | + actual_plugin = 'calibre.gui2.store.stores.eknigi_plugin:eKnigiStore' |
140 | + |
141 | + headquarters = 'BG' |
142 | + formats = ['EPUB', 'PDF', 'HTML'] |
143 | + #affiliate = True |
144 | + |
145 | class StoreEpubBudStore(StoreBase): |
146 | name = 'ePub Bud' |
147 | description = 'Well, it\'s pretty much just "YouTube for Children\'s eBooks. A not-for-profit organization devoted to brining self published childrens books to the world.' |
148 | @@ -1483,6 +1493,7 @@ |
149 | StoreEBookShoppeUKStore, |
150 | # StoreEPubBuyDEStore, |
151 | StoreEHarlequinStore, |
152 | + StoreEKnigiStore, |
153 | StoreEpubBudStore, |
154 | StoreFeedbooksStore, |
155 | StoreFoylesUKStore, |
156 | |
157 | === modified file 'src/calibre/devices/android/driver.py' |
158 | --- src/calibre/devices/android/driver.py 2011-07-16 16:01:32 +0000 |
159 | +++ src/calibre/devices/android/driver.py 2011-07-22 18:03:36 +0000 |
160 | @@ -47,10 +47,12 @@ |
161 | |
162 | |
163 | 0x18d1 : { |
164 | + 0x0001 : [0x0223], |
165 | 0x4e11 : [0x0100, 0x226, 0x227], |
166 | - 0x4e12: [0x0100, 0x226, 0x227], |
167 | - 0x4e21: [0x0100, 0x226, 0x227], |
168 | - 0xb058: [0x0222, 0x226, 0x227]}, |
169 | + 0x4e12 : [0x0100, 0x226, 0x227], |
170 | + 0x4e21 : [0x0100, 0x226, 0x227], |
171 | + 0xb058 : [0x0222, 0x226, 0x227] |
172 | + }, |
173 | |
174 | # Samsung |
175 | 0x04e8 : { 0x681d : [0x0222, 0x0223, 0x0224, 0x0400], |
176 | |
177 | === modified file 'src/calibre/devices/eb600/driver.py' |
178 | --- src/calibre/devices/eb600/driver.py 2011-06-07 00:26:05 +0000 |
179 | +++ src/calibre/devices/eb600/driver.py 2011-07-22 18:03:36 +0000 |
180 | @@ -35,9 +35,9 @@ |
181 | PRODUCT_ID = [0x1688] |
182 | BCD = [0x110] |
183 | |
184 | - VENDOR_NAME = ['NETRONIX', 'WOLDER'] |
185 | - WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2'] |
186 | - WINDOWS_CARD_A_MEM = 'EBOOK' |
187 | + VENDOR_NAME = ['NETRONIX', 'WOLDER', 'MD86371'] |
188 | + WINDOWS_MAIN_MEM = ['EBOOK', 'MIBUK_GAMMA_6.2', 'MD86371'] |
189 | + WINDOWS_CARD_A_MEM = ['EBOOK', 'MD86371'] |
190 | |
191 | OSX_MAIN_MEM = 'EB600 Internal Storage Media' |
192 | OSX_CARD_A_MEM = 'EB600 Card Storage Media' |
193 | |
194 | === modified file 'src/calibre/ebooks/mobi/debug.py' |
195 | --- src/calibre/ebooks/mobi/debug.py 2011-07-20 20:01:41 +0000 |
196 | +++ src/calibre/ebooks/mobi/debug.py 2011-07-22 18:03:36 +0000 |
197 | @@ -8,7 +8,7 @@ |
198 | __docformat__ = 'restructuredtext en' |
199 | |
200 | import struct, datetime, sys, os, shutil |
201 | -from collections import OrderedDict |
202 | +from collections import OrderedDict, defaultdict |
203 | from calibre.utils.date import utc_tz |
204 | from calibre.ebooks.mobi.langcodes import main_language, sub_language |
205 | from calibre.ebooks.mobi.utils import (decode_hex_number, decint, |
206 | @@ -530,21 +530,21 @@ |
207 | }, |
208 | |
209 | 'chapter_with_subchapters' : { |
210 | - 22 : ('First subchapter index', 'first_subchapter_index'), |
211 | - 23 : ('Last subchapter index', 'last_subchapter_index'), |
212 | + 22 : ('First subchapter index', 'first_child_index'), |
213 | + 23 : ('Last subchapter index', 'last_child_index'), |
214 | }, |
215 | |
216 | 'periodical' : { |
217 | 5 : ('Class offset in cncx', 'class_offset'), |
218 | - 22 : ('First section index', 'first_section_index'), |
219 | - 23 : ('Last section index', 'last_section_index'), |
220 | + 22 : ('First section index', 'first_child_index'), |
221 | + 23 : ('Last section index', 'last_child_index'), |
222 | }, |
223 | |
224 | 'section' : { |
225 | 5 : ('Class offset in cncx', 'class_offset'), |
226 | - 21 : ('Periodical index', 'periodical_index'), |
227 | - 22 : ('First article index', 'first_article_index'), |
228 | - 23 : ('Last article index', 'last_article_index'), |
229 | + 21 : ('Periodical index', 'parent_index'), |
230 | + 22 : ('First article index', 'first_child_index'), |
231 | + 23 : ('Last article index', 'last_child_index'), |
232 | }, |
233 | } |
234 | |
235 | @@ -625,11 +625,56 @@ |
236 | return tag.cncx_value |
237 | return '' |
238 | |
239 | + @property |
240 | + def offset(self): |
241 | + for tag in self.tags: |
242 | + if tag.attr == 'offset': |
243 | + return tag.value |
244 | + return 0 |
245 | + |
246 | + @property |
247 | + def size(self): |
248 | + for tag in self.tags: |
249 | + if tag.attr == 'size': |
250 | + return tag.value |
251 | + return 0 |
252 | + |
253 | + @property |
254 | + def depth(self): |
255 | + for tag in self.tags: |
256 | + if tag.attr == 'depth': |
257 | + return tag.value |
258 | + return 0 |
259 | + |
260 | + @property |
261 | + def parent_index(self): |
262 | + for tag in self.tags: |
263 | + if tag.attr == 'parent_index': |
264 | + return tag.value |
265 | + return -1 |
266 | + |
267 | + @property |
268 | + def first_child_index(self): |
269 | + for tag in self.tags: |
270 | + if tag.attr == 'first_child_index': |
271 | + return tag.value |
272 | + return -1 |
273 | + |
274 | + @property |
275 | + def last_child_index(self): |
276 | + for tag in self.tags: |
277 | + if tag.attr == 'last_child_index': |
278 | + return tag.value |
279 | + return -1 |
280 | + |
281 | def __str__(self): |
282 | ans = ['Index Entry(index=%s, entry_type=%s, length=%d)'%( |
283 | self.index, self.entry_type, len(self.tags))] |
284 | for tag in self.tags: |
285 | ans.append('\t'+str(tag)) |
286 | + if self.first_child_index != -1: |
287 | + ans.append('\tNumber of children: %d'%(self.last_child_index - |
288 | + self.first_child_index + 1)) |
289 | return '\n'.join(ans) |
290 | |
291 | # }}} |
292 | @@ -679,6 +724,15 @@ |
293 | entry_type = ord(indxt[off+consumed]) |
294 | self.indices.append(IndexEntry(index, entry_type, |
295 | indxt[off+consumed+1:next_off], cncx, index_header.tagx_entries)) |
296 | + index = self.indices[-1] |
297 | + |
298 | + def get_parent(self, index): |
299 | + if index.depth < 1: |
300 | + return None |
301 | + parent_depth = index.depth - 1 |
302 | + for p in self.indices: |
303 | + if p.depth != parent_depth: |
304 | + continue |
305 | |
306 | |
307 | def __str__(self): |
308 | @@ -793,6 +847,231 @@ |
309 | |
310 | # }}} |
311 | |
312 | +class TBSIndexing(object): # {{{ |
313 | + |
314 | + def __init__(self, text_records, indices, doc_type): |
315 | + self.record_indices = OrderedDict() |
316 | + self.doc_type = doc_type |
317 | + self.indices = indices |
318 | + pos = 0 |
319 | + for r in text_records: |
320 | + start = pos |
321 | + pos += len(r.raw) |
322 | + end = pos - 1 |
323 | + self.record_indices[r] = x = {'starts':[], 'ends':[], |
324 | + 'complete':[], 'geom': (start, end)} |
325 | + for entry in indices: |
326 | + istart, sz = entry.offset, entry.size |
327 | + iend = istart + sz - 1 |
328 | + has_start = istart >= start and istart <= end |
329 | + has_end = iend >= start and iend <= end |
330 | + rec = None |
331 | + if has_start and has_end: |
332 | + rec = 'complete' |
333 | + elif has_start and not has_end: |
334 | + rec = 'starts' |
335 | + elif not has_start and has_end: |
336 | + rec = 'ends' |
337 | + if rec: |
338 | + x[rec].append(entry) |
339 | + |
340 | + def get_index(self, idx): |
341 | + for i in self.indices: |
342 | + if i.index == idx: return i |
343 | + raise IndexError('Index %d not found'%idx) |
344 | + |
345 | + def __str__(self): |
346 | + ans = ['*'*20 + ' TBS Indexing (%d records) '%len(self.record_indices)+ '*'*20] |
347 | + for r, dat in self.record_indices.iteritems(): |
348 | + ans += self.dump_record(r, dat)[-1] |
349 | + return '\n'.join(ans) |
350 | + |
351 | + def dump(self, bdir): |
352 | + types = defaultdict(list) |
353 | + for r, dat in self.record_indices.iteritems(): |
354 | + tbs_type, strings = self.dump_record(r, dat) |
355 | + if tbs_type == 0: continue |
356 | + types[tbs_type] += strings |
357 | + for typ, strings in types.iteritems(): |
358 | + with open(os.path.join(bdir, 'tbs_type_%d.txt'%typ), 'wb') as f: |
359 | + f.write('\n'.join(strings)) |
360 | + |
361 | + def dump_record(self, r, dat): |
362 | + ans = [] |
363 | + ans.append('\nRecord #%d: Starts at: %d Ends at: %d'%(r.idx, |
364 | + dat['geom'][0], dat['geom'][1])) |
365 | + s, e, c = dat['starts'], dat['ends'], dat['complete'] |
366 | + ans.append(('\tContains: %d index entries ' |
367 | + '(%d ends, %d complete, %d starts)')%tuple(map(len, (s+e+c, e, |
368 | + c, s)))) |
369 | + byts = bytearray(r.trailing_data.get('indexing', b'')) |
370 | + sbyts = tuple(hex(b)[2:] for b in byts) |
371 | + ans.append('TBS bytes: %s'%(' '.join(sbyts))) |
372 | + for typ, entries in (('Ends', e), ('Complete', c), ('Starts', s)): |
373 | + if entries: |
374 | + ans.append('\t%s:'%typ) |
375 | + for x in entries: |
376 | + ans.append(('\t\tIndex Entry: %d (Parent index: %d, ' |
377 | + 'Depth: %d, Offset: %d, Size: %d) [%s]')%( |
378 | + x.index, x.parent_index, x.depth, x.offset, x.size, x.label)) |
379 | + def bin3(num): |
380 | + ans = bin(num)[2:] |
381 | + return '0'*(3-len(ans)) + ans |
382 | + |
383 | + tbs_type = 0 |
384 | + if len(byts): |
385 | + outer, consumed = decint(byts) |
386 | + byts = byts[consumed:] |
387 | + tbs_type = outer & 0b111 |
388 | + ans.append('TBS Type: %s (%d)'%(bin3(tbs_type), tbs_type)) |
389 | + ans.append('Outer Index entry: %d'%(outer >> 3)) |
390 | + arg1, consumed = decint(byts) |
391 | + byts = byts[consumed:] |
392 | + ans.append('Unknown (vwi: always 0?): %d'%arg1) |
393 | + if self.doc_type in (257, 259): # Hierarchical periodical |
394 | + byts, a = self.interpret_periodical(tbs_type, byts) |
395 | + ans += a |
396 | + if byts: |
397 | + sbyts = tuple(hex(b)[2:] for b in byts) |
398 | + ans.append('Remaining bytes: %s'%' '.join(sbyts)) |
399 | + |
400 | + ans.append('') |
401 | + return tbs_type, ans |
402 | + |
403 | + def interpret_periodical(self, tbs_type, byts): |
404 | + ans = [] |
405 | + |
406 | + def tbs_type_6(byts, psi=None, msg=None): # {{{ |
407 | + if psi is None: |
408 | + # Assume parent section is 1 |
409 | + psi = self.get_index(1) |
410 | + if msg is None: |
411 | + msg = ('Article index at start of record or first article' |
412 | + ' index, relative to parent section') |
413 | + if byts: |
414 | + # byts could be empty |
415 | + arg, consumed = decint(byts) |
416 | + byts = byts[consumed:] |
417 | + flags = (arg & 0b1111) |
418 | + ai = (arg >> 4) |
419 | + ans.append('%s (fvwi): %d [%d absolute]'%(msg, ai, |
420 | + ai+psi.index)) |
421 | + if flags == 1: |
422 | + arg, consumed = decint(byts) |
423 | + byts = byts[consumed:] |
424 | + ans.append('EOF (vwi: should be 0): %d'%arg) |
425 | + elif flags in (4, 5): |
426 | + num = byts[0] |
427 | + byts = byts[1:] |
428 | + ans.append('Number of article nodes in the record (byte): %d'%num) |
429 | + if flags == 5: |
430 | + arg, consumed = decint(byts) |
431 | + byts = byts[consumed:] |
432 | + ans.append('Unknown ??? (vwi)): %d'%(arg)) |
433 | + elif flags == 0: |
434 | + pass |
435 | + else: |
436 | + raise ValueError('Unknown flags: %d'%flags) |
437 | + return byts |
438 | + |
439 | + # }}} |
440 | + |
441 | + if tbs_type == 3: # {{{ |
442 | + arg2, consumed = decint(byts) |
443 | + byts = byts[consumed:] |
444 | + ans.append('Unknown (vwi: always 0?): %d'%arg2) |
445 | + |
446 | + arg3, consumed = decint(byts) |
447 | + byts = byts[consumed:] |
448 | + fsi = arg3 >> 4 |
449 | + extra = arg3 & 0b1111 |
450 | + ans.append('First section index (fvwi): %d'%fsi) |
451 | + psi = self.get_index(fsi) |
452 | + ans.append('Extra bits (flag: always 0?): %d'%extra) |
453 | + |
454 | + byts = tbs_type_6(byts, psi=psi, |
455 | + msg=('First article of ending section, relative to its' |
456 | + ' parent\'s index')) |
457 | + if byts: |
458 | + # We have a transition not just an opening first section |
459 | + psi = self.get_index(psi.index+1) |
460 | + arg, consumed = decint(byts) |
461 | + off = arg >> 4 |
462 | + byts = byts[consumed:] |
463 | + flags = arg & 0b1111 |
464 | + ans.append('Last article of ending section w.r.t. starting' |
465 | + ' section offset (fvwi): %d [%d absolute]'%(off, |
466 | + psi.index+off)) |
467 | + ans.append('Flags (always 8?): %d'%flags) |
468 | + byts = tbs_type_6(byts, psi=psi) |
469 | + # }}} |
470 | + |
471 | + elif tbs_type == 7: # {{{ |
472 | + # This occurs for records that have no section nodes and |
473 | + # whose parent section's index == 1 |
474 | + ans.append('Unknown (maybe vwi?): %r'%bytes(byts[:2])) |
475 | + byts = byts[2:] |
476 | + arg, consumed = decint(byts) |
477 | + byts = byts[consumed:] |
478 | + ai = arg >> 4 |
479 | + flags = arg & 0b1111 |
480 | + ans.append('Article at start of record (fvwi): %d'%ai) |
481 | + if flags == 4: |
482 | + num = byts[0] |
483 | + byts = byts[1:] |
484 | + ans.append('Number of articles in record (byte): %d'%num) |
485 | + elif flags == 0: |
486 | + pass |
487 | + elif flags == 1: |
488 | + arg, consumed = decint(byts) |
489 | + byts = byts[consumed:] |
490 | + ans.append('EOF (vwi: should be 0): %d'%arg) |
491 | + else: |
492 | + raise ValueError('Unknown flags value: %d'%flags) |
493 | + # }}} |
494 | + |
495 | + elif tbs_type == 6: # {{{ |
496 | + # This is used for records spanned by an article whose parent |
497 | + # section's index == 1 or for the opening record if it contains the |
498 | + # periodical start, section 1 start and at least one article. The |
499 | + # two cases are distinguished by the flags on the article index |
500 | + # vwi. |
501 | + unk = byts[0] |
502 | + byts = byts[1:] |
503 | + ans.append('Unknown (byte: always 2?): %d'%unk) |
504 | + byts = tbs_type_6(byts) |
505 | + # }}} |
506 | + |
507 | + elif tbs_type == 2: # {{{ |
508 | + # This occurs for records with no section nodes and whose parent |
509 | + # section's index != 1 (undefined (records before the first |
510 | + # section) or > 1) |
511 | + # This is also used for records that are spanned by an article |
512 | + # whose parent section index > 1. In this case the flags of the |
513 | + # vwi referring to the article at the start |
514 | + # of the record are set to 1 instead of 4. |
515 | + arg, consumed = decint(byts) |
516 | + byts = byts[consumed:] |
517 | + flags = (arg & 0b1111) |
518 | + psi = (arg >> 4) |
519 | + ans.append('Parent section index (fvwi): %d'%psi) |
520 | + psi = self.get_index(psi) |
521 | + ans.append('Flags: %d'%flags) |
522 | + if flags == 1: |
523 | + arg, consumed = decint(byts) |
524 | + byts = byts[consumed:] |
525 | + ans.append('Unknown (vwi?: always 0?): %d'%arg) |
526 | + byts = tbs_type_6(byts, psi=psi) |
527 | + elif flags == 0: |
528 | + byts = tbs_type_6(byts, psi=psi) |
529 | + else: |
530 | + raise ValueError('Unkown flags: %d'%flags) |
531 | + # }}} |
532 | + |
533 | + return byts, ans |
534 | + |
535 | +# }}} |
536 | + |
537 | class MOBIFile(object): # {{{ |
538 | |
539 | def __init__(self, stream): |
540 | @@ -874,6 +1153,9 @@ |
541 | else: |
542 | self.binary_records.append(BinaryRecord(i, r)) |
543 | |
544 | + if self.index_record is not None: |
545 | + self.tbs_indexing = TBSIndexing(self.text_records, |
546 | + self.index_record.indices, self.mobi_header.type_raw) |
547 | |
548 | def print_header(self, f=sys.stdout): |
549 | print (str(self.palmdb).encode('utf-8'), file=f) |
550 | @@ -905,6 +1187,9 @@ |
551 | print(str(f.cncx).encode('utf-8'), file=out) |
552 | print('\n\n', file=out) |
553 | print(str(f.index_record), file=out) |
554 | + with open(os.path.join(ddir, 'tbs_indexing.txt'), 'wb') as out: |
555 | + print(str(f.tbs_indexing), file=out) |
556 | + f.tbs_indexing.dump(ddir) |
557 | |
558 | for tdir, attr in [('text', 'text_records'), ('images', 'image_records'), |
559 | ('binary', 'binary_records')]: |
560 | |
561 | === added file 'src/calibre/ebooks/mobi/tbs_periodicals.rst' |
562 | --- src/calibre/ebooks/mobi/tbs_periodicals.rst 1970-01-01 00:00:00 +0000 |
563 | +++ src/calibre/ebooks/mobi/tbs_periodicals.rst 2011-07-22 18:03:36 +0000 |
564 | @@ -0,0 +1,189 @@ |
565 | +Reverse engineering the trailing byte sequences for hierarchical periodicals |
566 | +=============================================================================== |
567 | + |
568 | +In the following, *vwi* means variable width integer and *fvwi* means a vwi whose lowest four bits are used as a flag. |
569 | + |
570 | +Opening record |
571 | +---------------- |
572 | + |
573 | +The text record that contains the opening node for the periodical (depth=0 node in the NCX) can have TBS of 3 different forms: |
574 | + |
575 | + 1. If it has only the periodical node and no section/article nodes, TBS of type 2, like this:: |
576 | + |
577 | + Record #1: Starts at: 0 Ends at: 4095 |
578 | + Contains: 1 index entries (0 ends, 0 complete, 1 starts) |
579 | + TBS bytes: 82 80 |
580 | + Starts: |
581 | + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader] |
582 | + TBS Type: 010 (2) |
583 | + Outer Index entry: 0 |
584 | + Unknown (vwi: always 0?): 0 |
585 | + |
586 | + 2. A periodical and a section node, but no article nodes, TBS type of 6, like this:: |
587 | + |
588 | + Record #1: Starts at: 0 Ends at: 4095 |
589 | + Contains: 2 index entries (0 ends, 0 complete, 2 starts) |
590 | + TBS bytes: 86 80 2 |
591 | + Starts: |
592 | + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 93254) [j_x's Google reader] |
593 | + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 49280) [Ars Technica] |
594 | + TBS Type: 110 (6) |
595 | + Outer Index entry: 0 |
596 | + Unknown (vwi: always 0?): 0 |
597 | + Unknown (byte: always 2?): 2 |
598 | + |
599 | + 3. If it has both the section 1 node and at least one article node, TBS of type 6, like this:: |
600 | + |
601 | + Record #1: Starts at: 0 Ends at: 4095 |
602 | + Contains: 4 index entries (0 ends, 1 complete, 3 starts) |
603 | + TBS bytes: 86 80 2 c4 2 |
604 | + Complete: |
605 | + Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 549, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz] |
606 | + Starts: |
607 | + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 79253) [j_x's Google reader] |
608 | + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 541, Size: 35279) [Ars Technica] |
609 | + Index Entry: 6 (Parent index: 1, Depth: 2, Offset: 2415, Size: 2764) [Week in Apple: ZFS on Mac OS X, rogue tethering, DUI apps, and more] |
610 | + TBS Type: 110 (6) |
611 | + Outer Index entry: 0 |
612 | + Unknown (vwi: always 0?): 0 |
613 | + Unknown (byte: always 2?): 2 |
614 | + Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute] |
615 | + Number of article nodes in the record (byte): 2 |
616 | + |
617 | + If there was only a single article, instead of 2, then the last two bytes would be: c0, i.e. there would be no byte giving the number of articles in the record. |
618 | + |
619 | + |
620 | +Records with no nodes |
621 | +------------------------ |
622 | + |
623 | +These records are spanned by a single article. They are of two types: |
624 | + |
625 | + 1. If the parent section index is 1, TBS type of 6, like this:: |
626 | + |
627 | + Record #4: Starts at: 12288 Ends at: 16383 |
628 | + Contains: 0 index entries (0 ends, 0 complete, 0 starts) |
629 | + TBS bytes: 86 80 2 c1 80 |
630 | + TBS Type: 110 (6) |
631 | + Outer Index entry: 0 |
632 | + Unknown (vwi: always 0?): 0 |
633 | + Unknown (byte: always 2?): 2 |
634 | + Article index at start of record or first article index, relative to parent section (fvwi): 4 [5 absolute] |
635 | + EOF (vwi: should be 0): 0 |
636 | + |
637 | + If the record is before the first article, the TBS bytes would be: 86 80 2 |
638 | + |
639 | + 2. If the parent section index is > 1, TBS type of 2, like this:: |
640 | + |
641 | + Record #14: Starts at: 53248 Ends at: 57343 |
642 | + Contains: 0 index entries (0 ends, 0 complete, 0 starts) |
643 | + TBS bytes: 82 80 a0 1 e1 80 |
644 | + TBS Type: 010 (2) |
645 | + Outer Index entry: 0 |
646 | + Unknown (vwi: always 0?): 0 |
647 | + Parent section index (fvwi): 2 |
648 | + Flags: 0 |
649 | + Article index at start of record or first article index, relative to parent section (fvwi): 14 [16 absolute] |
650 | + EOF (vwi: should be 0): 0 |
651 | + |
652 | +Records with only article nodes |
653 | +----------------------------------- |
654 | + |
655 | +Such records have no section transitions (i.e. a section end/section start pair). They have only one or more article nodes. They are of two types: |
656 | + |
657 | + 1. If the parent section index is 1, TBS type of 7, like this:: |
658 | + |
659 | + Record #6: Starts at: 20480 Ends at: 24575 |
660 | + Contains: 2 index entries (1 ends, 0 complete, 1 starts) |
661 | + TBS bytes: 87 80 2 80 1 84 2 |
662 | + Ends: |
663 | + Index Entry: 9 (Parent index: 1, Depth: 2, Offset: 16453, Size: 4199) [Vaccine's success spurs whooping cough comeback] |
664 | + Starts: |
665 | + Index Entry: 10 (Parent index: 1, Depth: 2, Offset: 20652, Size: 4246) [Apple's mobile products do not violate Nokia patents, says ITC] |
666 | + TBS Type: 111 (7) |
667 | + Outer Index entry: 0 |
668 | + Unknown (vwi: always 0?): 0 |
669 | + Unknown: '\x02\x80' (vwi?: Always 256) |
670 | + Article at start of record (fvwi): 8 |
671 | + Number of articles in record (byte): 2 |
672 | + |
673 | + If there was only one article in the record, the last two bytes would be replaced by a single byte: 80 |
674 | + |
675 | + If this record is the first record with an article, then the article at the start of the record should be the last section index. At least, that's what kindlegen does, though if you ask me, it should be the first section index. |
676 | + |
677 | + |
678 | + 2. If the parent section index is > 1, TBS type of 2, like this:: |
679 | + |
680 | + Record #16: Starts at: 61440 Ends at: 65535 |
681 | + Contains: 5 index entries (1 ends, 3 complete, 1 starts) |
682 | + TBS bytes: 82 80 a1 80 1 f4 5 |
683 | + Ends: |
684 | + Index Entry: 17 (Parent index: 2, Depth: 2, Offset: 60920, Size: 1082) [Microsoft's Joe Belfiore still working on upcoming Zune hardware] |
685 | + Complete: |
686 | + Index Entry: 18 (Parent index: 2, Depth: 2, Offset: 62002, Size: 1016) [Rumour: OS X Lion nearing Golden Master stage] |
687 | + Index Entry: 19 (Parent index: 2, Depth: 2, Offset: 63018, Size: 1045) [iOS 4.3.1 released] |
688 | + Index Entry: 20 (Parent index: 2, Depth: 2, Offset: 64063, Size: 972) [Windows 8 'system reset' image leaks] |
689 | + Starts: |
690 | + Index Entry: 21 (Parent index: 2, Depth: 2, Offset: 65035, Size: 1057) [Windows Phone 7: Why it's failing] |
691 | + TBS Type: 010 (2) |
692 | + Outer Index entry: 0 |
693 | + Unknown (vwi: always 0?): 0 |
694 | + Parent section index (fvwi) : 2 |
695 | + Flags: 1 |
696 | + Unknown (vwi: always 0?): 0 |
697 | + Article index at start of record or first article index, relative to parent section (fvwi): 15 [17 absolute] |
698 | + Number of article nodes in the record (byte): 5 |
699 | + |
700 | + If there was only one article in the record, the last two bytes would be replaced by a single byte: f0 |
701 | + |
702 | +Records with a section transition |
703 | +----------------------------------- |
704 | + |
705 | +In such a record there is a transition from one section to the next. As such the record must have at least one article ending and one article starting, except in the case of the first section. |
706 | + |
707 | +TODO: Note you have to test the cases of first section, a single transition and multiple transitions. |
708 | + |
709 | + 1. The first section:: |
710 | + |
711 | + Record #2: Starts at: 4096 Ends at: 8191 |
712 | + Contains: 2 index entries (0 ends, 0 complete, 2 starts) |
713 | + TBS bytes: 83 80 80 90 c0 |
714 | + Starts: |
715 | + Index Entry: 1 (Parent index: 0, Depth: 1, Offset: 7758, Size: 26279) [Ars Technica] |
716 | + Index Entry: 5 (Parent index: 1, Depth: 2, Offset: 7766, Size: 1866) [Week in gaming: 3DS review, Crysis 2, George Hotz] |
717 | + TBS Type: 011 (3) |
718 | + Outer Index entry: 0 |
719 | + Unknown (vwi: always 0?): 0 |
720 | + Unknown (vwi: always 0?): 0 |
721 | + First section index (fvwi) : 1 |
722 | + Extra bits: 0 |
723 | + First section starts |
724 | + Article at start of block as offset from parent index (fvwi): 4 [5 absolute] |
725 | + Flags: 0 |
726 | + |
727 | + If there was more than one article at the start then the last byte would be replaced by: c4 n where n is the number of articles |
728 | + |
729 | + |
730 | +Ending record |
731 | +---------------- |
732 | + |
733 | +Logically, ending records must have at least one article ending, one section ending and the periodical ending. They are of TBS type 2, like this:: |
734 | + |
735 | + Record #17: Starts at: 65536 Ends at: 68684 |
736 | + Contains: 4 index entries (3 ends, 1 complete, 0 starts) |
737 | + TBS bytes: 82 80 c0 4 f4 2 |
738 | + Ends: |
739 | + Index Entry: 0 (Parent index: -1, Depth: 0, Offset: 215, Size: 68470) [j_x's Google reader] |
740 | + Index Entry: 4 (Parent index: 0, Depth: 1, Offset: 51234, Size: 17451) [Slashdot] |
741 | + Index Entry: 43 (Parent index: 4, Depth: 2, Offset: 65422, Size: 1717) [US ITC May Reverse Judge's Ruling In Kodak vs. Apple] |
742 | + Complete: |
743 | + Index Entry: 44 (Parent index: 4, Depth: 2, Offset: 67139, Size: 1546) [Google Starts Testing Google Music Internally] |
744 | + TBS Type: 010 (2) |
745 | + Outer Index entry: 0 |
746 | + Unknown (vwi: always 0?): 0 |
747 | + Parent section index (fvwi): 4 |
748 | + Flags: 0 |
749 | + Article at start of block as offset from parent index (fvwi): 39 [43 absolute] |
750 | + Number of nodes (byte): 2 |
751 | + |
752 | +If the record had only a single article end, the last two bytes would be replaced with: f0 |
753 | + |
754 | |
755 | === modified file 'src/calibre/ebooks/mobi/utils.py' |
756 | --- src/calibre/ebooks/mobi/utils.py 2011-07-20 20:01:41 +0000 |
757 | +++ src/calibre/ebooks/mobi/utils.py 2011-07-22 18:03:36 +0000 |
758 | @@ -79,7 +79,7 @@ |
759 | |
760 | def decint(raw, forward=True): |
761 | ''' |
762 | - Read a variable width integer from the bytestring raw and return the |
763 | + Read a variable width integer from the bytestring or bytearray raw and return the |
764 | integer and the number of bytes read. If forward is True bytes are read |
765 | from the start of raw, otherwise from the end of raw. |
766 | |
767 | @@ -88,8 +88,10 @@ |
768 | ''' |
769 | val = 0 |
770 | byts = bytearray() |
771 | - for byte in raw if forward else reversed(raw): |
772 | - bnum = ord(byte) |
773 | + src = bytearray(raw) |
774 | + if not forward: |
775 | + src.reverse() |
776 | + for bnum in src: |
777 | byts.append(bnum & 0b01111111) |
778 | if bnum & 0b10000000: |
779 | break |
780 | |
781 | === modified file 'src/calibre/gui2/store/stores/chitanka_plugin.py' |
782 | --- src/calibre/gui2/store/stores/chitanka_plugin.py 2011-07-19 06:16:55 +0000 |
783 | +++ src/calibre/gui2/store/stores/chitanka_plugin.py 2011-07-22 18:03:36 +0000 |
784 | @@ -55,36 +55,21 @@ |
785 | if counter <= 0: |
786 | break |
787 | |
788 | - id = ''.join(data.xpath('.//a[@class="booklink"]/@href')) |
789 | + id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip() |
790 | if not id: |
791 | continue |
792 | |
793 | - cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')) |
794 | - title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')) |
795 | - author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')) |
796 | - fb2 = ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')) |
797 | - epub = ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')) |
798 | - txt = ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')) |
799 | - |
800 | - # remove .zip extensions |
801 | - if fb2.find('.zip') != -1: |
802 | - fb2 = fb2[:fb2.find('.zip')] |
803 | - if epub.find('.zip') != -1: |
804 | - epub = epub[:epub.find('.zip')] |
805 | - if txt.find('.zip') != -1: |
806 | - txt = txt[:txt.find('.zip')] |
807 | - |
808 | counter -= 1 |
809 | |
810 | s = SearchResult() |
811 | - s.cover_url = cover_url |
812 | - s.title = title.strip() |
813 | - s.author = author.strip() |
814 | - s.detail_item = id.strip() |
815 | + s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip() |
816 | + s.title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip() |
817 | + s.author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip() |
818 | + s.detail_item = id |
819 | s.drm = SearchResult.DRM_UNLOCKED |
820 | - s.downloads['FB2'] = base_url + fb2.strip() |
821 | - s.downloads['EPUB'] = base_url + epub.strip() |
822 | - s.downloads['TXT'] = base_url + txt.strip() |
823 | + s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '') |
824 | + s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '') |
825 | + s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '') |
826 | s.formats = 'FB2, EPUB, TXT, SFB' |
827 | yield s |
828 | |
829 | @@ -106,35 +91,20 @@ |
830 | if counter <= 0: |
831 | break |
832 | |
833 | - id = ''.join(data.xpath('.//a[@class="booklink"]/@href')) |
834 | + id = ''.join(data.xpath('.//a[@class="booklink"]/@href')).strip() |
835 | if not id: |
836 | continue |
837 | |
838 | - cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')) |
839 | - title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')) |
840 | - author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')) |
841 | - fb2 = ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')) |
842 | - epub = ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')) |
843 | - txt = ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')) |
844 | - |
845 | - # remove .zip extensions |
846 | - if fb2.find('.zip') != -1: |
847 | - fb2 = fb2[:fb2.find('.zip')] |
848 | - if epub.find('.zip') != -1: |
849 | - epub = epub[:epub.find('.zip')] |
850 | - if txt.find('.zip') != -1: |
851 | - txt = txt[:txt.find('.zip')] |
852 | - |
853 | counter -= 1 |
854 | |
855 | s = SearchResult() |
856 | - s.cover_url = cover_url |
857 | - s.title = title.strip() |
858 | - s.author = author.strip() |
859 | - s.detail_item = id.strip() |
860 | + s.cover_url = ''.join(data.xpath('.//a[@class="booklink"]/img/@src')).strip() |
861 | + s.title = ''.join(data.xpath('.//a[@class="booklink"]/i/text()')).strip() |
862 | + s.author = ''.join(data.xpath('.//span[@class="bookauthor"]/a/text()')).strip() |
863 | + s.detail_item = id |
864 | s.drm = SearchResult.DRM_UNLOCKED |
865 | - s.downloads['FB2'] = base_url + fb2.strip() |
866 | - s.downloads['EPUB'] = base_url + epub.strip() |
867 | - s.downloads['TXT'] = base_url + txt.strip() |
868 | + s.downloads['FB2'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-fb2"]/@href')).strip().replace('.zip', '') |
869 | + s.downloads['EPUB'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-epub"]/@href')).strip().replace('.zip', '') |
870 | + s.downloads['TXT'] = base_url + ''.join(data.xpath('.//a[@class="dl dl-txt"]/@href')).strip().replace('.zip', '') |
871 | s.formats = 'FB2, EPUB, TXT, SFB' |
872 | yield s |
873 | |
874 | === added file 'src/calibre/gui2/store/stores/eknigi_plugin.py' |
875 | --- src/calibre/gui2/store/stores/eknigi_plugin.py 1970-01-01 00:00:00 +0000 |
876 | +++ src/calibre/gui2/store/stores/eknigi_plugin.py 2011-07-22 18:03:36 +0000 |
877 | @@ -0,0 +1,88 @@ |
878 | +# -*- coding: utf-8 -*- |
879 | + |
880 | +from __future__ import (unicode_literals, division, absolute_import, print_function) |
881 | + |
882 | +__license__ = 'GPL 3' |
883 | +__copyright__ = '2011, Alex Stanev <alex@stanev.org>' |
884 | +__docformat__ = 'restructuredtext en' |
885 | + |
886 | +import random |
887 | +import urllib2 |
888 | +from contextlib import closing |
889 | + |
890 | +from lxml import html |
891 | + |
892 | +from PyQt4.Qt import QUrl |
893 | + |
894 | +from calibre import browser, url_slash_cleaner |
895 | +from calibre.gui2 import open_url |
896 | +from calibre.gui2.store import StorePlugin |
897 | +from calibre.gui2.store.basic_config import BasicStoreConfig |
898 | +from calibre.gui2.store.search_result import SearchResult |
899 | +from calibre.gui2.store.web_store_dialog import WebStoreDialog |
900 | + |
901 | +class eKnigiStore(BasicStoreConfig, StorePlugin): |
902 | + |
903 | + def open(self, parent=None, detail_item=None, external=False): |
904 | + # Use Kovid's affiliate id 30% of the time |
905 | + if random.randint(1, 10) in (1, 2, 3): |
906 | + aff_suffix = '&amigosid=23' |
907 | + else: |
908 | + aff_suffix = '&amigosid=22' |
909 | + url = 'http://e-knigi.net/?' + aff_suffix[1:] |
910 | + |
911 | + if external or self.config.get('open_external', False): |
912 | + if detail_item: |
913 | + url = detail_item + aff_suffix |
914 | + open_url(QUrl(url_slash_cleaner(url))) |
915 | + else: |
916 | + detail_url = None |
917 | + if detail_item: |
918 | + url = detail_item + aff_suffix |
919 | + d = WebStoreDialog(self.gui, url, parent, detail_url) |
920 | + d.setWindowTitle(self.name) |
921 | + d.set_tags(self.config.get('tags', '')) |
922 | + d.exec_() |
923 | + |
924 | + def search(self, query, max_results=10, timeout=60): |
925 | + base_url = 'http://e-knigi.net' |
926 | + url = base_url + '/virtuemart?page=shop.browse&search_category=0&search_limiter=anywhere&limitstart=0&limit=' + str(max_results) + '&keyword=' + urllib2.quote(query) |
927 | + |
928 | + br = browser() |
929 | + |
930 | + counter = max_results |
931 | + with closing(br.open(url, timeout=timeout)) as f: |
932 | + doc = html.fromstring(f.read()) |
933 | + |
934 | + # if the store finds only one product, it opens directly detail view |
935 | + for data in doc.xpath('//div[@class="prod_details"]'): |
936 | + s = SearchResult() |
937 | + s.cover_url = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@src')).strip() |
938 | + s.title = ''.join(data.xpath('.//div[@class="vm_main_info clearfix"]/div[@class="lf"]/a/img/@alt')).strip() |
939 | + s.author = ''.join(data.xpath('.//div[@class="td_bg clearfix"]/div[@class="gk_product_tab"]/div/table/tr[3]/td[2]/text()')).strip() |
940 | + s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip() |
941 | + s.detail_item = url |
942 | + s.drm = SearchResult.DRM_UNLOCKED |
943 | + |
944 | + yield s |
945 | + return |
946 | + |
947 | + # search in store results |
948 | + for data in doc.xpath('//div[@class="browseProductContainer"]'): |
949 | + if counter <= 0: |
950 | + break |
951 | + id = ''.join(data.xpath('.//a[1]/@href')).strip() |
952 | + if not id: |
953 | + continue |
954 | + |
955 | + counter -= 1 |
956 | + |
957 | + s = SearchResult() |
958 | + s.cover_url = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@src')).strip() |
959 | + s.title = ''.join(data.xpath('.//a[@class="gk_vm_product_image"]/img/@title')).strip() |
960 | + s.author = ''.join(data.xpath('.//div[@style="float:left;width:90%"]/b/text()')).strip().replace('Автор: ', '') |
961 | + s.price = ''.join(data.xpath('.//span[@class="productPrice"]/text()')).strip() |
962 | + s.detail_item = base_url + id |
963 | + s.drm = SearchResult.DRM_UNLOCKED |
964 | + |
965 | + yield s |
966 | |
967 | === modified file 'src/calibre/translations/msgfmt.py' |
968 | --- src/calibre/translations/msgfmt.py 2008-05-02 16:41:12 +0000 |
969 | +++ src/calibre/translations/msgfmt.py 2011-07-22 18:03:36 +0000 |
970 | @@ -1,20 +1,39 @@ |
971 | #! /usr/bin/env python |
972 | # Written by Martin v. Loewis <loewis@informatik.hu-berlin.de> |
973 | -# Modified by Kovid Goyal <kovid@kovidgoyal.net> |
974 | |
975 | """Generate binary message catalog from textual translation description. |
976 | |
977 | This program converts a textual Uniforum-style message catalog (.po file) into |
978 | a binary GNU catalog (.mo file). This is essentially the same function as the |
979 | GNU msgfmt program, however, it is a simpler implementation. |
980 | + |
981 | +Usage: msgfmt.py [OPTIONS] filename.po |
982 | + |
983 | +Options: |
984 | + -o file |
985 | + --output-file=file |
986 | + Specify the output file to write to. If omitted, output will go to a |
987 | + file named filename.mo (based off the input file name). |
988 | + |
989 | + -h |
990 | + --help |
991 | + Print this message and exit. |
992 | + |
993 | + -V |
994 | + --version |
995 | + Display version information and exit. |
996 | """ |
997 | |
998 | import sys |
999 | import os |
1000 | +import getopt |
1001 | import struct |
1002 | import array |
1003 | |
1004 | -__version__ = "1.2" |
1005 | +__version__ = "1.1" |
1006 | + |
1007 | +MESSAGES = {} |
1008 | + |
1009 | |
1010 | def usage(code, msg=''): |
1011 | print >> sys.stderr, __doc__ |
1012 | @@ -23,16 +42,16 @@ |
1013 | sys.exit(code) |
1014 | |
1015 | |
1016 | - |
1017 | -def add(id, str, fuzzy, MESSAGES): |
1018 | +def add(id, str, fuzzy): |
1019 | "Add a non-fuzzy translation to the dictionary." |
1020 | + global MESSAGES |
1021 | if not fuzzy and str: |
1022 | MESSAGES[id] = str |
1023 | |
1024 | |
1025 | - |
1026 | -def generate(MESSAGES): |
1027 | +def generate(): |
1028 | "Return the generated output." |
1029 | + global MESSAGES |
1030 | keys = MESSAGES.keys() |
1031 | # the keys are sorted in the .mo file |
1032 | keys.sort() |
1033 | @@ -44,6 +63,7 @@ |
1034 | offsets.append((len(ids), len(id), len(strs), len(MESSAGES[id]))) |
1035 | ids += id + '\0' |
1036 | strs += MESSAGES[id] + '\0' |
1037 | + output = '' |
1038 | # The header is 7 32-bit unsigned integers. We don't use hash tables, so |
1039 | # the keys start right after the index tables. |
1040 | # translated string. |
1041 | @@ -71,9 +91,7 @@ |
1042 | return output |
1043 | |
1044 | |
1045 | - |
1046 | def make(filename, outfile): |
1047 | - MESSAGES = {} |
1048 | ID = 1 |
1049 | STR = 2 |
1050 | |
1051 | @@ -101,7 +119,7 @@ |
1052 | lno += 1 |
1053 | # If we get a comment line after a msgstr, this is a new entry |
1054 | if l[0] == '#' and section == STR: |
1055 | - add(msgid, msgstr, fuzzy, MESSAGES) |
1056 | + add(msgid, msgstr, fuzzy) |
1057 | section = None |
1058 | fuzzy = 0 |
1059 | # Record a fuzzy mark |
1060 | @@ -111,16 +129,39 @@ |
1061 | if l[0] == '#': |
1062 | continue |
1063 | # Now we are in a msgid section, output previous section |
1064 | - if l.startswith('msgid'): |
1065 | + if l.startswith('msgid') and not l.startswith('msgid_plural'): |
1066 | if section == STR: |
1067 | - add(msgid, msgstr, fuzzy, MESSAGES) |
1068 | + add(msgid, msgstr, fuzzy) |
1069 | section = ID |
1070 | l = l[5:] |
1071 | msgid = msgstr = '' |
1072 | + is_plural = False |
1073 | + # This is a message with plural forms |
1074 | + elif l.startswith('msgid_plural'): |
1075 | + if section != ID: |
1076 | + print >> sys.stderr, 'msgid_plural not preceeded by msgid on %s:%d' %\ |
1077 | + (infile, lno) |
1078 | + sys.exit(1) |
1079 | + l = l[12:] |
1080 | + msgid += '\0' # separator of singular and plural |
1081 | + is_plural = True |
1082 | # Now we are in a msgstr section |
1083 | elif l.startswith('msgstr'): |
1084 | section = STR |
1085 | - l = l[6:] |
1086 | + if l.startswith('msgstr['): |
1087 | + if not is_plural: |
1088 | + print >> sys.stderr, 'plural without msgid_plural on %s:%d' %\ |
1089 | + (infile, lno) |
1090 | + sys.exit(1) |
1091 | + l = l.split(']', 1)[1] |
1092 | + if msgstr: |
1093 | + msgstr += '\0' # Separator of the various plural forms |
1094 | + else: |
1095 | + if is_plural: |
1096 | + print >> sys.stderr, 'indexed msgstr required for plural on %s:%d' %\ |
1097 | + (infile, lno) |
1098 | + sys.exit(1) |
1099 | + l = l[6:] |
1100 | # Skip empty lines |
1101 | l = l.strip() |
1102 | if not l: |
1103 | @@ -138,22 +179,40 @@ |
1104 | sys.exit(1) |
1105 | # Add last entry |
1106 | if section == STR: |
1107 | - add(msgid, msgstr, fuzzy, MESSAGES) |
1108 | + add(msgid, msgstr, fuzzy) |
1109 | |
1110 | # Compute output |
1111 | - output = generate(MESSAGES) |
1112 | - |
1113 | + output = generate() |
1114 | + |
1115 | + outfile.write(output) |
1116 | + |
1117 | + |
1118 | +def main(): |
1119 | try: |
1120 | - outfile.write(output) |
1121 | - except IOError,msg: |
1122 | - print >> sys.stderr, msg |
1123 | - |
1124 | - |
1125 | - |
1126 | -def main(outfile, args=sys.argv[1:]): |
1127 | + opts, args = getopt.getopt(sys.argv[1:], 'hVo:', |
1128 | + ['help', 'version', 'output-file=']) |
1129 | + except getopt.error, msg: |
1130 | + usage(1, msg) |
1131 | + |
1132 | + outfile = None |
1133 | + # parse options |
1134 | + for opt, arg in opts: |
1135 | + if opt in ('-h', '--help'): |
1136 | + usage(0) |
1137 | + elif opt in ('-V', '--version'): |
1138 | + print >> sys.stderr, "msgfmt.py", __version__ |
1139 | + sys.exit(0) |
1140 | + elif opt in ('-o', '--output-file'): |
1141 | + outfile = arg |
1142 | + # do it |
1143 | + if not args: |
1144 | + print >> sys.stderr, 'No input file given' |
1145 | + print >> sys.stderr, "Try `msgfmt --help' for more information." |
1146 | + return |
1147 | + |
1148 | for filename in args: |
1149 | make(filename, outfile) |
1150 | - return 0 |
1151 | + |
1152 | |
1153 | if __name__ == '__main__': |
1154 | - sys.exit(main(sys.stdout)) |
1155 | + main() |
1156 | |
1157 | === modified file 'src/calibre/utils/localization.py' |
1158 | --- src/calibre/utils/localization.py 2011-07-14 22:55:58 +0000 |
1159 | +++ src/calibre/utils/localization.py 2011-07-22 18:03:36 +0000 |
1160 | @@ -71,13 +71,13 @@ |
1161 | lang = get_lang() |
1162 | if lang: |
1163 | buf = iso639 = None |
1164 | - if os.access(lang+'.po', os.R_OK): |
1165 | + mpath = get_lc_messages_path(lang) |
1166 | + if mpath and os.access(mpath+'.po', os.R_OK): |
1167 | from calibre.translations.msgfmt import make |
1168 | buf = cStringIO.StringIO() |
1169 | - make(lang+'.po', buf) |
1170 | + make(mpath+'.po', buf) |
1171 | buf = cStringIO.StringIO(buf.getvalue()) |
1172 | |
1173 | - mpath = get_lc_messages_path(lang) |
1174 | if mpath is not None: |
1175 | with ZipFile(P('localization/locales.zip', |
1176 | allow_user_override=False), 'r') as zf: |
1177 | |
1178 | === modified file 'src/calibre/web/feeds/recipes/model.py' |
1179 | --- src/calibre/web/feeds/recipes/model.py 2011-06-25 04:47:59 +0000 |
1180 | +++ src/calibre/web/feeds/recipes/model.py 2011-07-22 18:03:36 +0000 |
1181 | @@ -217,6 +217,8 @@ |
1182 | self.all_urns.add(urn) |
1183 | if ok(urn): |
1184 | lang = x.get('language', 'und') |
1185 | + if lang: |
1186 | + lang = lang.replace('-', '_') |
1187 | if lang not in lang_map: |
1188 | lang_map[lang] = factory(NewsCategory, new_root, lang) |
1189 | factory(NewsItem, lang_map[lang], urn, x.get('title')) |