Merge lp:~realender/calibre/calibre into lp:~user-none/calibre/store
- calibre
- Merge into store
Status: | Merged |
---|---|
Merged at revision: | 8441 |
Proposed branch: | lp:~realender/calibre/calibre |
Merge into: | lp:~user-none/calibre/store |
Diff against target: |
363397 lines (+81812/-43845) 143 files modified
Changelog.yaml (+53/-1) recipes/freakonomics.recipe (+15/-11) recipes/nikkei_news.recipe (+88/-0) recipes/techcrunch.recipe (+63/-0) recipes/tijolaco.recipe (+24/-0) recipes/time_magazine.recipe (+58/-69) recipes/vio_mundo.recipe (+30/-0) recipes/wired_uk.recipe (+111/-36) recipes/zeitde.recipe (+6/-5) recipes/zeitde_sub.recipe (+136/-34) resources/default_tweaks.py (+7/-0) resources/templates/fb2.xsl (+6/-2) session.vim (+2/-2) setup/check.py (+6/-15) setup/translations.py (+27/-22) src/calibre/__init__.py (+2/-2) src/calibre/constants.py (+1/-1) src/calibre/customize/builtins.py (+11/-0) src/calibre/db/backend.py (+89/-6) src/calibre/db/cache.py (+377/-2) src/calibre/db/fields.py (+257/-0) src/calibre/db/locking.py (+153/-6) src/calibre/db/tables.py (+50/-8) src/calibre/db/view.py (+109/-0) src/calibre/devices/android/driver.py (+4/-2) src/calibre/devices/kobo/driver.py (+175/-170) src/calibre/devices/usbms/device.py (+7/-2) src/calibre/devices/usbms/driver.py (+23/-5) src/calibre/ebooks/__init__.py (+4/-2) src/calibre/ebooks/chardet/__init__.py (+5/-1) src/calibre/ebooks/conversion/cli.py (+3/-1) src/calibre/ebooks/conversion/plumber.py (+11/-3) src/calibre/ebooks/htmlz/input.py (+40/-6) src/calibre/ebooks/metadata/book/base.py (+1/-1) src/calibre/ebooks/mobi/debug.py (+170/-23) src/calibre/ebooks/mobi/output.py (+19/-11) src/calibre/ebooks/mobi/writer.py (+17/-13) src/calibre/ebooks/mobi/writer2/__init__.py (+15/-0) src/calibre/ebooks/mobi/writer2/main.py (+579/-0) src/calibre/ebooks/mobi/writer2/serializer.py (+246/-0) src/calibre/ebooks/oeb/base.py (+11/-4) src/calibre/ebooks/oeb/iterator.py (+1/-1) src/calibre/ebooks/oeb/transforms/flatcss.py (+2/-1) src/calibre/ebooks/oeb/transforms/guide.py (+3/-0) src/calibre/ebooks/oeb/transforms/htmltoc.py (+6/-2) src/calibre/gui2/__init__.py (+18/-4) src/calibre/gui2/actions/convert.py (+10/-3) src/calibre/gui2/actions/delete.py (+1/-1) src/calibre/gui2/actions/view.py (+2/-1) src/calibre/gui2/convert/look_and_feel.py (+4/-1) src/calibre/gui2/convert/look_and_feel.ui (+107/-82) src/calibre/gui2/convert/mobi_output.py (+1/-1) src/calibre/gui2/convert/mobi_output.ui (+13/-6) src/calibre/gui2/custom_column_widgets.py (+1/-0) src/calibre/gui2/dialogs/jobs.ui (+1/-4) src/calibre/gui2/jobs.py (+20/-11) src/calibre/gui2/library/delegates.py (+28/-3) src/calibre/gui2/metadata/basic_widgets.py (+68/-8) src/calibre/gui2/metadata/single.py (+3/-1) src/calibre/gui2/preferences/look_feel.py (+7/-2) src/calibre/gui2/preferences/toolbar.ui (+67/-79) src/calibre/gui2/store/stores/chitanka_plugin.py (+140/-0) src/calibre/gui2/tools.py (+14/-4) src/calibre/gui2/viewer/main.py (+7/-6) src/calibre/gui2/wizard/__init__.py (+10/-5) src/calibre/library/database2.py (+17/-0) src/calibre/library/save_to_disk.py (+3/-2) src/calibre/manual/faq.rst (+21/-3) src/calibre/manual/gui.rst (+2/-1) src/calibre/ptempfile.py (+11/-1) src/calibre/translations/af.po (+1029/-601) src/calibre/translations/ar.po (+1628/-916) src/calibre/translations/ast.po (+1029/-601) src/calibre/translations/az.po (+1029/-601) src/calibre/translations/bg.po (+1051/-604) src/calibre/translations/bn.po (+1029/-601) src/calibre/translations/br.po (+1029/-601) src/calibre/translations/bs.po (+1030/-601) src/calibre/translations/ca.po (+1381/-684) src/calibre/translations/calibre.pot (+276/-162) src/calibre/translations/cs.po (+1257/-650) src/calibre/translations/da.po (+1244/-644) src/calibre/translations/de.po (+1301/-662) src/calibre/translations/el.po (+1042/-606) src/calibre/translations/en_AU.po (+1029/-601) src/calibre/translations/en_CA.po (+1059/-617) src/calibre/translations/en_GB.po (+1417/-786) src/calibre/translations/eo.po (+1029/-601) src/calibre/translations/es.po (+1389/-692) src/calibre/translations/et.po (+1029/-601) src/calibre/translations/eu.po (+1248/-656) src/calibre/translations/fa.po (+1029/-601) src/calibre/translations/fi.po (+1034/-602) src/calibre/translations/fo.po (+1029/-601) src/calibre/translations/fr.po (+1449/-688) src/calibre/translations/gl.po (+1317/-674) src/calibre/translations/he.po (+1057/-607) src/calibre/translations/hi.po (+1029/-601) src/calibre/translations/hr.po (+1145/-632) src/calibre/translations/hu.po (+1207/-643) src/calibre/translations/id.po (+1029/-601) src/calibre/translations/it.po (+1272/-665) src/calibre/translations/ja.po (+1407/-679) src/calibre/translations/ko.po (+1154/-629) src/calibre/translations/lt.po (+1030/-601) src/calibre/translations/ltg.po (+1029/-601) src/calibre/translations/lv.po (+1034/-602) src/calibre/translations/ml.po (+1029/-601) src/calibre/translations/mr.po (+1029/-601) src/calibre/translations/ms.po (+1029/-601) src/calibre/translations/nb.po (+1256/-657) src/calibre/translations/nds.po (+1135/-631) src/calibre/translations/nl.po (+1381/-684) src/calibre/translations/oc.po (+1029/-601) src/calibre/translations/pa.po (+1029/-601) src/calibre/translations/pl.po (+1748/-712) src/calibre/translations/pt.po (+1213/-647) src/calibre/translations/pt_BR.po (+1189/-642) src/calibre/translations/ro.po (+1130/-624) src/calibre/translations/ru.po (+1187/-636) src/calibre/translations/sc.po (+1029/-601) src/calibre/translations/si.po (+1029/-601) src/calibre/translations/sk.po (+1097/-616) src/calibre/translations/sl.po (+1162/-628) src/calibre/translations/sq.po (+1033/-605) src/calibre/translations/sr.po (+1251/-651) src/calibre/translations/sv.po (+1336/-677) src/calibre/translations/ta.po (+1029/-601) src/calibre/translations/te.po (+1029/-601) src/calibre/translations/th.po (+1034/-602) src/calibre/translations/tr.po (+1171/-669) src/calibre/translations/uk.po (+1118/-619) src/calibre/translations/ur.po (+1029/-601) src/calibre/translations/vi.po (+1091/-620) src/calibre/translations/wa.po (+1029/-601) src/calibre/translations/yi.po (+1029/-601) src/calibre/translations/zh_CN.po (+1230/-638) src/calibre/translations/zh_HK.po (+1033/-605) src/calibre/translations/zh_TW.po (+1236/-639) src/calibre/utils/ipc/launch.py (+11/-2) src/calibre/utils/localization.py (+12/-12) src/calibre/utils/magick/draw.py (+2/-2) src/calibre/web/feeds/news.py (+3/-1) |
To merge this branch: | bzr merge lp:~realender/calibre/calibre |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Alex Stanev (community) | Needs Resubmitting | ||
John Schember | Needs Fixing | ||
Review via email: mp+68240@code.launchpad.net |
Commit message
chitanka.info search plugin
Description of the change
chitanka.info plugin, DRM-free ebooks, available in fb2, epub, txt and sfb formats. Books in Bulgarian.
- 9899. By Alex Stanev
-
Fixed compare and ordering
- 9900. By Alex Stanev
-
Fix whitespace
Alex Stanev (realender) wrote : | # |
Hi John,
1)
Chitanka has slightly complicated structure of works in it. It divides them in books, poems, productions, novels and etc. The plugin searches only with books type currently. "Червей под есенен вятър" is a single novel, so it is not in results. The "alex" search returns works by Alexandre Dumas and etc. The plugin returns the right books, but there is additional filtering in Calibre itself, which prevents showing them because chitanka finds "alex" in english but returns "Александър Дюма" in bulgarian as author. I believe it's just OK for now.
2) and 3) fixed and pushed.
> There are a few issues:
>
> 1) Searches for "Червей под есенен вятър" and "alex" produce no results with
> this plugin. Searches for those queries on the web site produces results.
>
> 2) Please use != instead of <> as <> is obsolete.
>
> 3) Put the entries in builtins in alphabetical order.
Preview Diff
1 | === modified file 'Changelog.yaml' |
2 | --- Changelog.yaml 2011-07-08 17:01:37 +0000 |
3 | +++ Changelog.yaml 2011-07-19 06:18:03 +0000 |
4 | @@ -19,6 +19,58 @@ |
5 | # new recipes: |
6 | # - title: |
7 | |
8 | +- version: 0.8.10 |
9 | + date: 2011-07-15 |
10 | + |
11 | + new features: |
12 | + - title: "Add a right click menu to the cover browser. It allows you to view a book, edit metadata etc. from within the cover browser. The menu can be customized in Preferences->Toolbars" |
13 | + |
14 | + - title: "Allow selecting and stopping multiple jobs at once in the jobs window" |
15 | + tickets: [810349] |
16 | + |
17 | + - title: "When editing metadata directly in the book list, have a little pop up menu so that all existing values can be accessed by mouse only. For example, when you edit authors, you can use the mouse to select an existing author." |
18 | + |
19 | + - title: "Get Books: Add ebook.nl and fix price parsing for the legimi store" |
20 | + |
21 | + - title: "Drivers for Samsung Infuse and Motorola XPERT" |
22 | + |
23 | + - title: "Tag Browser: Make hierarchical items work in group searched terms." |
24 | + |
25 | + bug fixes: |
26 | + - title: "Allow setting numbers larger than 99 in custom series columns" |
27 | + |
28 | + - title: "Fix a bug that caused the same news download sent via a USB connection to the device on two different days resulting in a duplicate on the device" |
29 | + |
30 | + - title: "Ensure English in the list of interface languages in Preferences is always listed in English, so that it does not become hard to find" |
31 | + |
32 | + - title: "SNB Output: Fix bug in handling unicode file names" |
33 | + |
34 | + - title: "Fix sorting problem in manage categories. Fix poor performance problem when dropping multiple books onto a user category." |
35 | + |
36 | + - title: "Remove 'empty field' error dialogs in bulk search/replace, instead setting the fields to their default value." |
37 | + |
38 | + - title: "Fix regression that broke communicating with Kobo devices using outdated firmware" |
39 | + tickets: [807832] |
40 | + |
41 | + - title: "LRF Input: Fix conversion of LRF files with non ascii titles on some windows systems" |
42 | + tickets: [807641] |
43 | + |
44 | + improved recipes: |
45 | + - Time |
46 | + - Freakonomics Blog |
47 | + - io9 |
48 | + - "Computer Act!ve" |
49 | + |
50 | + new recipes: |
51 | + - title: Techcrunch and Pecat |
52 | + author: Darko Miletic |
53 | + |
54 | + - title: Vio Mundo, IDG Now and Tojolaco |
55 | + author: Diniz Bortoletto |
56 | + |
57 | + - title: Geek and Poke, Automatiseringgids IT |
58 | + author: DrMerry |
59 | + |
60 | - version: 0.8.9 |
61 | date: 2011-07-08 |
62 | |
63 | @@ -32,7 +84,7 @@ |
64 | - title: "Conversion pipeline: Add option to control if duplicate entries are allowed when generating the Table of Contents from links." |
65 | tickets: [806095] |
66 | |
67 | - - title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downlaoding published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not." |
68 | + - title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downloading published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not." |
69 | tickets: [798309] |
70 | |
71 | - title: "Get Books: Remove OpenLibrary since it has the same files as archive.org. Allow direct downloading from Project Gutenberg." |
72 | |
73 | === modified file 'recipes/freakonomics.recipe' |
74 | --- recipes/freakonomics.recipe 2010-05-23 17:29:13 +0000 |
75 | +++ recipes/freakonomics.recipe 2011-07-19 06:18:03 +0000 |
76 | @@ -1,25 +1,29 @@ |
77 | #!/usr/bin/env python |
78 | __license__ = 'GPL v3' |
79 | -__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net' |
80 | +__copyright__ = '2011, Starson17' |
81 | __docformat__ = 'restructuredtext en' |
82 | |
83 | from calibre.web.feeds.news import BasicNewsRecipe |
84 | |
85 | class Freakonomics(BasicNewsRecipe): |
86 | - |
87 | title = 'Freakonomics Blog' |
88 | description = 'The Hidden side of everything' |
89 | - __author__ = 'Starson17' |
90 | + __author__ = 'Starson17' |
91 | + __version__ = '1.02' |
92 | + __date__ = '11 July 2011' |
93 | language = 'en' |
94 | cover_url = 'http://ilkerugur.files.wordpress.com/2009/04/freakonomics.jpg' |
95 | - |
96 | - feeds = [('Blog', 'http://feeds.feedburner.com/freakonomicsblog')] |
97 | - |
98 | - keep_only_tags = [dict(name='div', attrs={'id':'header'}), |
99 | - dict(name='h1'), |
100 | - dict(name='h2'), |
101 | - dict(name='div', attrs={'class':'entry-content'}), |
102 | - ] |
103 | + use_embedded_content= False |
104 | + no_stylesheets = True |
105 | + oldest_article = 30 |
106 | + remove_javascript = True |
107 | + remove_empty_feeds = True |
108 | + max_articles_per_feed = 50 |
109 | + |
110 | + feeds = [(u'Freakonomics Blog', u'http://www.freakonomics.com/feed/')] |
111 | + keep_only_tags = [dict(name='div', attrs={'id':['content']})] |
112 | + remove_tags_after = [dict(name='div', attrs={'class':['simple_socialmedia']})] |
113 | + remove_tags = [dict(name='div', attrs={'class':['simple_socialmedia','single-fb-share','wp-polls']})] |
114 | extra_css = ''' |
115 | h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} |
116 | h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} |
117 | |
118 | === added file 'recipes/icons/techcrunch.png' |
119 | Binary files recipes/icons/techcrunch.png 1970-01-01 00:00:00 +0000 and recipes/icons/techcrunch.png 2011-07-19 06:18:03 +0000 differ |
120 | === added file 'recipes/nikkei_news.recipe' |
121 | --- recipes/nikkei_news.recipe 1970-01-01 00:00:00 +0000 |
122 | +++ recipes/nikkei_news.recipe 2011-07-19 06:18:03 +0000 |
123 | @@ -0,0 +1,88 @@ |
124 | +from calibre.web.feeds.recipes import BasicNewsRecipe |
125 | +import re |
126 | + |
127 | +#import pprint, sys |
128 | +#pp = pprint.PrettyPrinter(indent=4) |
129 | + |
130 | +class NikkeiNet_paper_subscription(BasicNewsRecipe): |
131 | + title = u'\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\uFF08\u671D\u520A\u30FB\u5915\u520A\uFF09' |
132 | + __author__ = 'Ado Nishimura' |
133 | + description = u'\u65E5\u7D4C\u96FB\u5B50\u7248\u306B\u3088\u308B\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\u3002\u671D\u520A\u30FB\u5915\u520A\u306F\u53D6\u5F97\u6642\u9593\u306B\u3088\u308A\u5207\u308A\u66FF\u308F\u308A\u307E\u3059\u3002\u8981\u8CFC\u8AAD' |
134 | + needs_subscription = True |
135 | + oldest_article = 1 |
136 | + max_articles_per_feed = 30 |
137 | + language = 'ja' |
138 | + no_stylesheets = True |
139 | + cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' |
140 | + masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg' |
141 | + |
142 | + remove_tags_before = {'class':"cmn-indent"} |
143 | + remove_tags = [ |
144 | +# {'class':"cmn-article_move"}, |
145 | +# {'class':"cmn-pr_list"}, |
146 | +# {'class':"cmnc-zoom"}, |
147 | + {'class':"cmn-hide"}, |
148 | + {'name':'form'}, |
149 | + ] |
150 | + remove_tags_after = {'class':"cmn-indent"} |
151 | + |
152 | + def get_browser(self): |
153 | + br = BasicNewsRecipe.get_browser() |
154 | + |
155 | + #pp.pprint(self.parse_index()) |
156 | + #exit(1) |
157 | + |
158 | + #br.set_debug_http(True) |
159 | + #br.set_debug_redirects(True) |
160 | + #br.set_debug_responses(True) |
161 | + |
162 | + if self.username is not None and self.password is not None: |
163 | + print "----------------------------open top page----------------------------------------" |
164 | + br.open('http://www.nikkei.com/') |
165 | + print "----------------------------open first login form--------------------------------" |
166 | + link = br.links(url_regex="www.nikkei.com/etc/accounts/login").next() |
167 | + br.follow_link(link) |
168 | + #response = br.response() |
169 | + #print response.get_data() |
170 | + print "----------------------------JS redirect(send autoPostForm)-----------------------" |
171 | + br.select_form(name='autoPostForm') |
172 | + br.submit() |
173 | + #response = br.response() |
174 | + print "----------------------------got login form---------------------------------------" |
175 | + br.select_form(name='LA0210Form01') |
176 | + br['LA0210Form01:LA0210Email'] = self.username |
177 | + br['LA0210Form01:LA0210Password'] = self.password |
178 | + br.submit() |
179 | + #response = br.response() |
180 | + print "----------------------------JS redirect------------------------------------------" |
181 | + br.select_form(nr=0) |
182 | + br.submit() |
183 | + |
184 | + #br.set_debug_http(False) |
185 | + #br.set_debug_redirects(False) |
186 | + #br.set_debug_responses(False) |
187 | + return br |
188 | + |
189 | + def cleanup(self): |
190 | + print "----------------------------logout-----------------------------------------------" |
191 | + self.browser.open('https://regist.nikkei.com/ds/etc/accounts/logout') |
192 | + |
193 | + def parse_index(self): |
194 | + print "----------------------------get index of paper-----------------------------------" |
195 | + result = [] |
196 | + soup = self.index_to_soup('http://www.nikkei.com/paper/') |
197 | + #soup = self.index_to_soup(self.test_data()) |
198 | + for sect in soup.findAll('div', 'cmn-section kn-special JSID_baseSection'): |
199 | + sect_title = sect.find('h3', 'cmnc-title').string |
200 | + sect_result = [] |
201 | + for elem in sect.findAll(attrs={'class':['cmn-article_title']}): |
202 | + url = 'http://www.nikkei.com' + elem.span.a['href'] |
203 | + url = re.sub("/article/", "/print-article/", url) # print version. |
204 | + span = elem.span.a.span |
205 | + if ((span is not None) and (len(span.contents) > 1)): |
206 | + title = span.contents[1].string |
207 | + sect_result.append(dict(title=title, url=url, date='', |
208 | + description='', content='')) |
209 | + result.append([sect_title, sect_result]) |
210 | + #pp.pprint(result) |
211 | + |
212 | |
213 | === added file 'recipes/techcrunch.recipe' |
214 | --- recipes/techcrunch.recipe 1970-01-01 00:00:00 +0000 |
215 | +++ recipes/techcrunch.recipe 2011-07-19 06:18:03 +0000 |
216 | @@ -0,0 +1,63 @@ |
217 | +__license__ = 'GPL v3' |
218 | +__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>' |
219 | +''' |
220 | +techcrunch.com |
221 | +''' |
222 | + |
223 | +from calibre.web.feeds.news import BasicNewsRecipe |
224 | + |
225 | +class TechCrunch(BasicNewsRecipe): |
226 | + title = 'TechCrunch' |
227 | + __author__ = 'Darko Miletic' |
228 | + description = 'IT News' |
229 | + publisher = 'AOL Inc.' |
230 | + category = 'news, IT' |
231 | + oldest_article = 2 |
232 | + max_articles_per_feed = 200 |
233 | + no_stylesheets = True |
234 | + encoding = 'utf8' |
235 | + use_embedded_content = False |
236 | + language = 'en' |
237 | + remove_empty_feeds = True |
238 | + publication_type = 'newsportal' |
239 | + masthead_url = 'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/site-logo.png' |
240 | + extra_css = """ |
241 | + body{font-family: Helvetica,Arial,sans-serif } |
242 | + img{margin-bottom: 0.4em; display:block} |
243 | + """ |
244 | + |
245 | + conversion_options = { |
246 | + 'comment' : description |
247 | + , 'tags' : category |
248 | + , 'publisher' : publisher |
249 | + , 'language' : language |
250 | + } |
251 | + |
252 | + remove_tags = [dict(name=['meta','link'])] |
253 | + remove_attributes=['lang'] |
254 | + keep_only_tags=[ |
255 | + dict(name='h1', attrs={'class':'headline'}) |
256 | + ,dict(attrs={'class':['author','post-time','body-copy']}) |
257 | + ] |
258 | + |
259 | + feeds = [(u'News', u'http://feeds.feedburner.com/TechCrunch/')] |
260 | + |
261 | + def preprocess_html(self, soup): |
262 | + for item in soup.findAll(style=True): |
263 | + del item['style'] |
264 | + for item in soup.findAll('a'): |
265 | + limg = item.find('img') |
266 | + if item.string is not None: |
267 | + str = item.string |
268 | + item.replaceWith(str) |
269 | + else: |
270 | + if limg: |
271 | + item.name = 'div' |
272 | + item.attrs = [] |
273 | + else: |
274 | + str = self.tag_to_string(item) |
275 | + item.replaceWith(str) |
276 | + for item in soup.findAll('img'): |
277 | + if not item.has_key('alt'): |
278 | + item['alt'] = 'image' |
279 | + return soup |
280 | |
281 | === added file 'recipes/tijolaco.recipe' |
282 | --- recipes/tijolaco.recipe 1970-01-01 00:00:00 +0000 |
283 | +++ recipes/tijolaco.recipe 2011-07-19 06:18:03 +0000 |
284 | @@ -0,0 +1,24 @@ |
285 | +from calibre.web.feeds.recipes import BasicNewsRecipe |
286 | + |
287 | +class Tijolaco(BasicNewsRecipe): |
288 | + title = u'Tijolaco.com' |
289 | + __author__ = u'Diniz Bortolotto' |
290 | + description = u'Posts do Blog Tijola\xe7o.com' |
291 | + oldest_article = 7 |
292 | + max_articles_per_feed = 50 |
293 | + encoding = 'utf8' |
294 | + publisher = u'Brizola Neto' |
295 | + category = 'politics, Brazil' |
296 | + language = 'pt_BR' |
297 | + publication_type = 'politics portal' |
298 | + use_embedded_content = False |
299 | + no_stylesheets = True |
300 | + remove_javascript = True |
301 | + |
302 | + feeds = [(u'Blog Tijola\xe7o.com', u'http://feeds.feedburner.com/Tijolacoblog')] |
303 | + |
304 | + reverse_article_order = True |
305 | + |
306 | + keep_only_tags = [dict(name='div', attrs={'class':'post'})] |
307 | + |
308 | + remove_tags = [dict(name='span', attrs={'class':'com'})] |
309 | |
310 | === modified file 'recipes/time_magazine.recipe' |
311 | --- recipes/time_magazine.recipe 2011-05-20 06:49:24 +0000 |
312 | +++ recipes/time_magazine.recipe 2011-07-19 06:18:03 +0000 |
313 | @@ -8,47 +8,33 @@ |
314 | |
315 | import re |
316 | from calibre.web.feeds.news import BasicNewsRecipe |
317 | +from lxml import html |
318 | |
319 | class Time(BasicNewsRecipe): |
320 | #recipe_disabled = ('This recipe has been disabled as TIME no longer' |
321 | # ' publish complete articles on the web.') |
322 | title = u'Time' |
323 | - __author__ = 'Kovid Goyal and Sujata Raman' |
324 | + __author__ = 'Kovid Goyal' |
325 | description = 'Weekly magazine' |
326 | encoding = 'utf-8' |
327 | no_stylesheets = True |
328 | language = 'en' |
329 | remove_javascript = True |
330 | |
331 | - extra_css = ''' h1 {font-family:georgia,serif;color:#000000;} |
332 | - .mainHd{font-family:georgia,serif;color:#000000;} |
333 | - h2 {font-family:Arial,Sans-serif;} |
334 | - .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; } |
335 | - .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;} |
336 | - .byline{font-family:Arial,Sans-serif; font-size:x-small ;} |
337 | - .photoBkt{ font-size:x-small ;} |
338 | - .vertPhoto{font-size:x-small ;} |
339 | - .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;} |
340 | - .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;} |
341 | - .artTxt{font-family:georgia,serif;} |
342 | - #content{font-family:georgia,serif;} |
343 | - .caption{font-family:georgia,serif; font-size:x-small;color:#333333;} |
344 | - .credit{font-family:georgia,serif; font-size:x-small;color:#999999;} |
345 | - a:link{color:#CC0000;} |
346 | - .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;} |
347 | - ''' |
348 | - |
349 | - |
350 | - keep_only_tags = [ dict(name ="div",attrs = {"id" :["content"]}) , |
351 | - dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,] |
352 | - remove_tags = [ dict(name ="div",attrs = {'class':['articleFooterNav','listsByTopic','articleTools2','relatedContent','sideContent','topBannerWrap','articlePagination','nextUp',"rtCol","pagination","enlarge","contentTools2",]}), |
353 | - dict(name ="span",attrs = {'class':['see']}), |
354 | - dict(name ="div",attrs = {'id':['header','articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}), |
355 | - dict(name ="a",attrs = {'class':['listLink']}), |
356 | - dict(name ="ul",attrs = {'id':['shareSocial','tabs']}), |
357 | - dict(name ="li",attrs = {'class':['back']}), |
358 | - dict(name ="ul",attrs = {'class':['navCount']}), |
359 | - ] |
360 | + |
361 | + keep_only_tags = [ |
362 | + { |
363 | + 'class':['artHd', 'articleContent', |
364 | + 'entry-title','entry-meta', 'entry-content', 'thumbnail'] |
365 | + }, |
366 | + ] |
367 | + remove_tags = [ |
368 | + {'class':['content-tools', 'quigo', 'see', |
369 | + 'first-tier-social-tools', 'navigation', 'enlarge lightbox']}, |
370 | + {'id':['share-tools']}, |
371 | + {'rel':'lightbox'}, |
372 | + ] |
373 | + |
374 | recursions = 10 |
375 | match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html',r'http://www.time.com/time/specials/packages/article/.*'] |
376 | |
377 | @@ -56,10 +42,11 @@ |
378 | r'<meta .+/>'), lambda m:'')] |
379 | |
380 | def parse_index(self): |
381 | - soup = self.index_to_soup('http://www.time.com/time/magazine') |
382 | - img = soup.find('a', title="View Large Cover", href=True) |
383 | - if img is not None: |
384 | - cover_url = 'http://www.time.com'+img['href'] |
385 | + raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True) |
386 | + root = html.fromstring(raw) |
387 | + img = root.xpath('//a[.="View Large Cover" and @href]') |
388 | + if img: |
389 | + cover_url = 'http://www.time.com' + img[0].get('href') |
390 | try: |
391 | nsoup = self.index_to_soup(cover_url) |
392 | img = nsoup.find('img', src=re.compile('archive/covers')) |
393 | @@ -70,46 +57,48 @@ |
394 | |
395 | |
396 | feeds = [] |
397 | - parent = soup.find(id='tocGuts') |
398 | - for seched in parent.findAll(attrs={'class':'toc_seched'}): |
399 | - section = self.tag_to_string(seched).capitalize() |
400 | - articles = list(self.find_articles(seched)) |
401 | - feeds.append((section, articles)) |
402 | + parent = root.xpath('//div[@class="content-main-aside"]')[0] |
403 | + for sec in parent.xpath( |
404 | + 'descendant::section[contains(@class, "sec-mag-section")]'): |
405 | + h3 = sec.xpath('./h3') |
406 | + if h3: |
407 | + section = html.tostring(h3[0], encoding=unicode, |
408 | + method='text').strip().capitalize() |
409 | + self.log('Found section', section) |
410 | + articles = list(self.find_articles(sec)) |
411 | + if articles: |
412 | + feeds.append((section, articles)) |
413 | |
414 | return feeds |
415 | |
416 | - def find_articles(self, seched): |
417 | - for a in seched.findNextSiblings( attrs={'class':['toc_hed','rule2']}): |
418 | - if a.name in "div": |
419 | - break |
420 | - else: |
421 | - yield { |
422 | - 'title' : self.tag_to_string(a), |
423 | - 'url' : 'http://www.time.com'+a['href'], |
424 | - 'date' : '', |
425 | - 'description' : self.article_description(a) |
426 | - } |
427 | - |
428 | - |
429 | - |
430 | - def article_description(self, a): |
431 | - ans = [] |
432 | - while True: |
433 | - t = a.nextSibling |
434 | - if t is None: |
435 | - break |
436 | - a = t |
437 | - if getattr(t, 'name', False): |
438 | - if t.get('class', '') == 'toc_parens' or t.name == 'br': |
439 | - continue |
440 | - if t.name in ('div', 'a'): |
441 | - break |
442 | - ans.append(self.tag_to_string(t)) |
443 | - else: |
444 | - ans.append(unicode(t)) |
445 | - return u' '.join(ans).replace(u'\xa0', u'').strip() |
446 | + def find_articles(self, sec): |
447 | + |
448 | + for article in sec.xpath('./article'): |
449 | + h2 = article.xpath('./*[@class="entry-title"]') |
450 | + if not h2: continue |
451 | + a = h2[0].xpath('./a[@href]') |
452 | + if not a: continue |
453 | + title = html.tostring(a[0], encoding=unicode, |
454 | + method='text').strip() |
455 | + if not title: continue |
456 | + url = a[0].get('href') |
457 | + if url.startswith('/'): |
458 | + url = 'http://www.time.com'+url |
459 | + desc = '' |
460 | + p = article.xpath('./*[@class="entry-content"]') |
461 | + if p: |
462 | + desc = html.tostring(p[0], encoding=unicode, |
463 | + method='text') |
464 | + self.log('\t', title, ':\n\t\t', desc) |
465 | + yield { |
466 | + 'title' : title, |
467 | + 'url' : url, |
468 | + 'date' : '', |
469 | + 'description' : desc |
470 | + } |
471 | |
472 | def postprocess_html(self,soup,first): |
473 | for tag in soup.findAll(attrs ={'class':['artPag','pagination']}): |
474 | tag.extract() |
475 | return soup |
476 | + |
477 | |
478 | === added file 'recipes/vio_mundo.recipe' |
479 | --- recipes/vio_mundo.recipe 1970-01-01 00:00:00 +0000 |
480 | +++ recipes/vio_mundo.recipe 2011-07-19 06:18:03 +0000 |
481 | @@ -0,0 +1,30 @@ |
482 | +import re |
483 | +from calibre.web.feeds.news import BasicNewsRecipe |
484 | + |
485 | +class VioMundo(BasicNewsRecipe): |
486 | + title = 'Blog VioMundo' |
487 | + __author__ = 'Diniz Bortolotto' |
488 | + description = 'Posts do Blog VioMundo' |
489 | + publisher = 'Luiz Carlos Azenha' |
490 | + oldest_article = 5 |
491 | + max_articles_per_feed = 20 |
492 | + category = 'news, politics, Brazil' |
493 | + language = 'pt_BR' |
494 | + publication_type = 'news and politics portal' |
495 | + use_embedded_content = False |
496 | + no_stylesheets = True |
497 | + remove_javascript = True |
498 | + |
499 | + feeds = [(u'Blog VioMundo', u'http://www.viomundo.com.br/feed')] |
500 | + |
501 | + reverse_article_order = True |
502 | + |
503 | + def print_version(self, url): |
504 | + return url + '/print/' |
505 | + |
506 | + remove_tags_after = dict(id='BlogContent') |
507 | + |
508 | + preprocess_regexps = [ |
509 | + (re.compile(r'\|\ <u>.*</p>'), |
510 | + lambda match: '</p>') |
511 | + ] |
512 | |
513 | === modified file 'recipes/wired_uk.recipe' |
514 | --- recipes/wired_uk.recipe 2010-02-17 17:47:04 +0000 |
515 | +++ recipes/wired_uk.recipe 2011-07-19 06:18:03 +0000 |
516 | @@ -1,28 +1,29 @@ |
517 | - |
518 | __license__ = 'GPL v3' |
519 | -__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>' |
520 | +__copyright__ = '2011, Starson17 <Starson17 at gmail.com>' |
521 | ''' |
522 | www.wired.co.uk |
523 | ''' |
524 | |
525 | from calibre import strftime |
526 | from calibre.web.feeds.news import BasicNewsRecipe |
527 | +import re |
528 | |
529 | class Wired_UK(BasicNewsRecipe): |
530 | title = 'Wired Magazine - UK edition' |
531 | - __author__ = 'Darko Miletic' |
532 | + __author__ = 'Starson17' |
533 | + __version__ = 'v1.30' |
534 | + __date__ = '15 July 2011' |
535 | description = 'Gaming news' |
536 | publisher = 'Conde Nast Digital' |
537 | category = 'news, games, IT, gadgets' |
538 | - oldest_article = 32 |
539 | + oldest_article = 40 |
540 | max_articles_per_feed = 100 |
541 | no_stylesheets = True |
542 | encoding = 'utf-8' |
543 | use_embedded_content = False |
544 | - masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif' |
545 | + #masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif' |
546 | language = 'en_GB' |
547 | - extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} ' |
548 | - index = 'http://www.wired.co.uk/wired-magazine.aspx' |
549 | + index = 'http://www.wired.co.uk' |
550 | |
551 | conversion_options = { |
552 | 'comment' : description |
553 | @@ -31,44 +32,118 @@ |
554 | , 'language' : language |
555 | } |
556 | |
557 | - keep_only_tags = [dict(name='div', attrs={'class':'article-box'})] |
558 | - remove_tags = [ |
559 | - dict(name=['object','embed','iframe','link']) |
560 | - ,dict(attrs={'class':['opts','comment','stories']}) |
561 | - ] |
562 | - remove_tags_after = dict(name='div',attrs={'class':'stories'}) |
563 | + keep_only_tags = [dict(name='div', attrs={'class':['layoutColumn1']})] |
564 | + remove_tags = [dict(name='div',attrs={'class':['articleSidebar1','commentAddBox linkit','commentCountBox commentCountBoxBig']})] |
565 | + remove_tags_after = dict(name='div',attrs={'class':['mainCopy entry-content','mainCopy']}) |
566 | + ''' |
567 | remove_attributes = ['height','width'] |
568 | - |
569 | - |
570 | + ,dict(name=['object','embed','iframe','link']) |
571 | + ,dict(attrs={'class':['opts','comment','stories']}) |
572 | + ] |
573 | + ''' |
574 | def parse_index(self): |
575 | totalfeeds = [] |
576 | soup = self.index_to_soup(self.index) |
577 | - maincontent = soup.find('div',attrs={'class':'main-content'}) |
578 | + recentcontent = soup.find('ul',attrs={'class':'linkList3'}) |
579 | mfeed = [] |
580 | - if maincontent: |
581 | - st = maincontent.find(attrs={'class':'most-wired-box'}) |
582 | - if st: |
583 | - for itt in st.findAll('a',href=True): |
584 | - url = 'http://www.wired.co.uk' + itt['href'] |
585 | - title = self.tag_to_string(itt) |
586 | - description = '' |
587 | - date = strftime(self.timefmt) |
588 | - mfeed.append({ |
589 | - 'title' :title |
590 | - ,'date' :date |
591 | - ,'url' :url |
592 | - ,'description':description |
593 | - }) |
594 | - totalfeeds.append(('Articles', mfeed)) |
595 | + if recentcontent: |
596 | + for li in recentcontent.findAll('li'): |
597 | + a = li.h2.a |
598 | + url = self.index + a['href'] + '?page=all' |
599 | + title = self.tag_to_string(a) |
600 | + description = '' |
601 | + date = strftime(self.timefmt) |
602 | + mfeed.append({ |
603 | + 'title' :title |
604 | + ,'date' :date |
605 | + ,'url' :url |
606 | + ,'description':description |
607 | + }) |
608 | + totalfeeds.append(('Wired UK Magazine Latest News', mfeed)) |
609 | + popmagcontent = soup.findAll('div',attrs={'class':'sidebarLinkList'}) |
610 | + magcontent = popmagcontent[1] |
611 | + mfeed2 = [] |
612 | + if magcontent: |
613 | + a = magcontent.h3.a |
614 | + if a: |
615 | + url = self.index + a['href'] + '?page=all' |
616 | + title = self.tag_to_string(a) |
617 | + description = '' |
618 | + date = strftime(self.timefmt) |
619 | + mfeed2.append({ |
620 | + 'title' :title |
621 | + ,'date' :date |
622 | + ,'url' :url |
623 | + ,'description':description |
624 | + }) |
625 | + for li in magcontent.findAll('li'): |
626 | + a = li.a |
627 | + url = self.index + a['href'] + '?page=all' |
628 | + title = self.tag_to_string(a) |
629 | + description = '' |
630 | + date = strftime(self.timefmt) |
631 | + mfeed2.append({ |
632 | + 'title' :title |
633 | + ,'date' :date |
634 | + ,'url' :url |
635 | + ,'description':description |
636 | + }) |
637 | + totalfeeds.append(('Wired UK Magazine Features', mfeed2)) |
638 | + |
639 | + magsoup = self.index_to_soup(self.index + '/magazine') |
640 | + startcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titleStart'}).parent |
641 | + mfeed3 = [] |
642 | + if startcontent: |
643 | + for li in startcontent.findAll('li'): |
644 | + a = li.a |
645 | + url = self.index + a['href'] + '?page=all' |
646 | + title = self.tag_to_string(a) |
647 | + description = '' |
648 | + date = strftime(self.timefmt) |
649 | + mfeed3.append({ |
650 | + 'title' :title |
651 | + ,'date' :date |
652 | + ,'url' :url |
653 | + ,'description':description |
654 | + }) |
655 | + totalfeeds.append(('Wired UK Magazine More', mfeed3)) |
656 | + |
657 | + playcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titlePlay'}).parent |
658 | + mfeed4 = [] |
659 | + if playcontent: |
660 | + for li in playcontent.findAll('li'): |
661 | + a = li.a |
662 | + url = self.index + a['href'] + '?page=all' |
663 | + title = self.tag_to_string(a) |
664 | + description = '' |
665 | + date = strftime(self.timefmt) |
666 | + mfeed4.append({ |
667 | + 'title' :title |
668 | + ,'date' :date |
669 | + ,'url' :url |
670 | + ,'description':description |
671 | + }) |
672 | + totalfeeds.append(('Wired UK Magazine Play', mfeed4)) |
673 | return totalfeeds |
674 | |
675 | def get_cover_url(self): |
676 | - cover_url = None |
677 | - soup = self.index_to_soup(self.index) |
678 | - cover_item = soup.find('span', attrs={'class':'cover'}) |
679 | + cover_url = '' |
680 | + soup = self.index_to_soup(self.index + '/magazine/archive') |
681 | + cover_item = soup.find('div', attrs={'class':'image linkme'}) |
682 | if cover_item: |
683 | cover_url = cover_item.img['src'] |
684 | return cover_url |
685 | |
686 | - def print_version(self, url): |
687 | - return url + '?page=all' |
688 | + def preprocess_html(self, soup): |
689 | + for tag in soup.findAll(name='p'): |
690 | + if tag.find(name='span', text=re.compile(r'This article was taken from.*', re.DOTALL|re.IGNORECASE)): |
691 | + tag.extract() |
692 | + return soup |
693 | + |
694 | + extra_css = ''' |
695 | + h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;} |
696 | + h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;} |
697 | + p{font-family:Arial,Helvetica,sans-serif;font-size:small;} |
698 | + body{font-family:Helvetica,Arial,sans-serif;font-size:small;} |
699 | + ''' |
700 | + |
701 | |
702 | === modified file 'recipes/zeitde.recipe' |
703 | --- recipes/zeitde.recipe 2010-12-14 16:32:16 +0000 |
704 | +++ recipes/zeitde.recipe 2011-07-19 06:18:03 +0000 |
705 | @@ -15,15 +15,16 @@ |
706 | encoding = 'UTF-8' |
707 | |
708 | __author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing' |
709 | + no_stylesheets = True |
710 | |
711 | max_articles_per_feed = 40 |
712 | |
713 | remove_tags = [ |
714 | - dict(name='iframe'), |
715 | - dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }), |
716 | - dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }), |
717 | - dict(name='div', attrs={'id':["place_5","place_4","comments"]}) |
718 | - ] |
719 | + dict(name='iframe'), |
720 | + dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }), |
721 | + dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }), |
722 | + dict(name='div', attrs={'id':["place_5","place_4","comments"]}) |
723 | + ] |
724 | |
725 | keep_only_tags = [dict(id=['main'])] |
726 | |
727 | |
728 | === modified file 'recipes/zeitde_sub.recipe' |
729 | --- recipes/zeitde_sub.recipe 2011-02-25 21:01:47 +0000 |
730 | +++ recipes/zeitde_sub.recipe 2011-07-19 06:18:03 +0000 |
731 | @@ -2,18 +2,21 @@ |
732 | # -*- coding: utf-8 mode: python -*- |
733 | |
734 | __license__ = 'GPL v3' |
735 | -__copyright__ = '2010-2011, Steffen Siebert <calibre at steffensiebert.de>' |
736 | +__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>' |
737 | __docformat__ = 'restructuredtext de' |
738 | -__version__ = '1.2' |
739 | +__version__ = '1.5' |
740 | |
741 | """ |
742 | Die Zeit EPUB |
743 | """ |
744 | |
745 | -import os, urllib2, zipfile, re |
746 | +import os, zipfile, re, cStringIO |
747 | from calibre.web.feeds.news import BasicNewsRecipe |
748 | from calibre.ptempfile import PersistentTemporaryFile |
749 | from calibre import walk |
750 | +from urlparse import urlparse |
751 | +from contextlib import closing |
752 | +from calibre.utils.magick.draw import save_cover_data_to |
753 | |
754 | class ZeitEPUBAbo(BasicNewsRecipe): |
755 | |
756 | @@ -22,49 +25,112 @@ |
757 | language = 'de' |
758 | lang = 'de-DE' |
759 | |
760 | - __author__ = 'Steffen Siebert and Tobias Isenberg' |
761 | + __author__ = 'Steffen Siebert, revised by Tobias Isenberg (with some code by Kovid Goyal)' |
762 | needs_subscription = True |
763 | |
764 | conversion_options = { |
765 | 'no_default_epub_cover' : True, |
766 | # fixing the wrong left margin |
767 | 'mobi_ignore_margins' : True, |
768 | + 'keep_ligatures' : True, |
769 | } |
770 | |
771 | preprocess_regexps = [ |
772 | - # filtering for correct dashes |
773 | - (re.compile(r' - '), lambda match: ' – '), # regular "Gedankenstrich" |
774 | - (re.compile(r' -,'), lambda match: ' –,'), # "Gedankenstrich" before a comma |
775 | - (re.compile(r'(?<=\d)-(?=\d)'), lambda match: '–'), # number-number |
776 | + # filtering for correct dashes ("Gedankenstrich" and "bis") |
777 | + (re.compile(u' (-|\u2212)(?=[ ,])'), lambda match: u' \u2013'), |
778 | + (re.compile(r'(?<=\d)-(?=\d)'), lambda match: u'\u2013'), # number-number |
779 | + (re.compile(u'(?<=\d,)-(?= ?\u20AC)'), lambda match: u'\u2013'), # ,- Euro |
780 | + # fix the number dash number dash for the title image that was broken by the previous line |
781 | + (re.compile(u'(?<=\d\d\d\d)\u2013(?=\d?\d\.png)'), lambda match: '-'), |
782 | + # filtering for certain dash cases |
783 | + (re.compile(r'Bild - Zeitung'), lambda match: 'Bild-Zeitung'), # the obvious |
784 | + (re.compile(r'EMail'), lambda match: 'E-Mail'), # the obvious |
785 | + (re.compile(r'SBahn'), lambda match: 'S-Bahn'), # the obvious |
786 | + (re.compile(r'UBoot'), lambda match: 'U-Boot'), # the obvious |
787 | + (re.compile(r'T Shirt'), lambda match: 'T-Shirt'), # the obvious |
788 | + (re.compile(r'TShirt'), lambda match: 'T-Shirt'), # the obvious |
789 | + # the next two lines not only fix errors but also create new ones. this is due to additional errors in |
790 | + # the typesetting such as missing commas or wrongly placed dashes. but more is fixed than broken. |
791 | + (re.compile(r'(?<!und|der|\w\w,) -(?=\w)'), lambda match: '-'), # space too much before a connecting dash |
792 | + (re.compile(r'(?<=\w)- (?!und\b|oder\b|wie\b|aber\b|auch\b|sondern\b|bis\b|&|&\s|bzw\.|auf\b|eher\b)'), lambda match: '-'), # space too much after a connecting dash |
793 | + # filtering for missing spaces before the month in long dates |
794 | + (re.compile(u'(?<=\d)\.(?=(Januar|Februar|M\u00E4rz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember))'), lambda match: '. '), |
795 | + # filtering for other missing spaces |
796 | + (re.compile(r'Stuttgart21'), lambda match: 'Stuttgart 21'), # the obvious |
797 | + (re.compile(u'(?<=\d)(?=\u20AC)'), lambda match: u'\u2013'), # Zahl[no space]Euro |
798 | + (re.compile(r':(?=[^\d\s</])'), lambda match: ': '), # missing space after colon |
799 | + (re.compile(u'\u00AB(?=[^\-\.:;,\?!<\)\s])'), lambda match: u'\u00AB '), # missing space after closing quotation |
800 | + (re.compile(u'(?<=[^\s\(>])\u00BB'), lambda match: u' \u00BB'), # missing space before opening quotation |
801 | + (re.compile(r'(?<=[a-z])(?=(I|II|III|IV|V|VI|VII|VIII|IX|X|XI|XII|XIII|XIV|XV|XVI|XVII|XVIII|XIX|XX)\.)'), lambda match: ' '), # missing space before Roman numeral |
802 | + (re.compile(r'(?<=(I|V|X)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral |
803 | + (re.compile(r'(?<=(II|IV|VI|IX|XI|XV|XX)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral |
804 | + (re.compile(r'(?<=(III|VII|XII|XIV|XVI|XIX)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral |
805 | + (re.compile(r'(?<=(VIII|XIII|XVII)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral |
806 | + (re.compile(r'(?<=(XVIII)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral |
807 | + (re.compile(r'(?<=[A-Za-zÄÖÜäöü]),(?=[A-Za-zÄÖÜäöü])'), lambda match: ', '), # missing space after comma |
808 | + (re.compile(r'(?<=[a-zäöü])\.(?=[A-ZÄÖÜ][A-Za-zÄÖÜäöü])'), lambda match: '. '), # missing space after full-stop |
809 | + (re.compile(r'(?<=[uU]\.) (?=a\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously |
810 | + (re.compile(r'(?<=[iI]\.) (?=A\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously |
811 | + (re.compile(r'(?<=[zZ]\.) (?=B\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously |
812 | + (re.compile(r'(?<=\w\.) (?=[A-Z][a-z]*@)'), lambda match: ''), # fix e-mail address that was potentially broken previously |
813 | + (re.compile(r'(?<=\d)[Pp]rozent'), lambda match: ' Prozent'), |
814 | + (re.compile(r'\.\.\.\.+'), lambda match: '...'), # too many dots (....) |
815 | + (re.compile(r'(?<=[^\s])\.\.\.'), lambda match: ' ...'), # spaces before ... |
816 | + (re.compile(r'\.\.\.(?=[^\s])'), lambda match: '... '), # spaces after ... |
817 | + (re.compile(r'(?<=[\[\(]) \.\.\. (?=[\]\)])'), lambda match: '...'), # fix special cases of ... in brackets |
818 | + (re.compile(u'(?<=[\u00BB\u203A]) \.\.\.'), lambda match: '...'), # fix special cases of ... after a quotation mark |
819 | + (re.compile(u'\.\.\. (?=[\u00AB\u2039,])'), lambda match: '...'), # fix special cases of ... before a quotation mark or comma |
820 | + # fix missing spaces between numbers and any sort of units, possibly with dot |
821 | + (re.compile(r'(?<=\d)(?=(Femto|Piko|Nano|Mikro|Milli|Zenti|Dezi|Hekto|Kilo|Mega|Giga|Tera|Peta|Tausend|Trilli|Kubik|Quadrat|Meter|Uhr|Jahr|Schuljahr|Seite))'), lambda match: ' '), |
822 | + (re.compile(r'(?<=\d\.)(?=(Femto|Piko|Nano|Mikro|Milli|Zenti|Dezi|Hekto|Kilo|Mega|Giga|Tera|Peta|Tausend|Trilli|Kubik|Quadrat|Meter|Uhr|Jahr|Schuljahr|Seite))'), lambda match: ' '), |
823 | + # fix wrong spaces |
824 | + (re.compile(r'(?<=<p class="absatz">[A-ZÄÖÜ]) (?=[a-zäöü\-])'), lambda match: ''), # at beginning of paragraphs |
825 | + (re.compile(u' \u00AB'), lambda match: u'\u00AB '), # before closing quotation |
826 | + (re.compile(u'\u00BB '), lambda match: u' \u00BB'), # after opening quotation |
827 | + # filtering for spaces in large numbers for better readability |
828 | + (re.compile(r'(?<=\d\d)(?=\d\d\d[ ,\.;\)<\?!-])'), lambda match: u'\u2008'), # end of the number with some character following |
829 | + (re.compile(r'(?<=\d\d)(?=\d\d\d. )'), lambda match: u'\u2008'), # end of the number with full-stop following, then space is necessary (avoid file names) |
830 | + (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level |
831 | + (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level |
832 | + (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level |
833 | + (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level |
834 | # filtering for unicode characters that are missing on the Kindle, |
835 | # try to replace them with meaningful work-arounds |
836 | - (re.compile(u'\u2080'), lambda match: '<span style="font-size: 50%;">0</span>'), # subscript-0 |
837 | - (re.compile(u'\u2081'), lambda match: '<span style="font-size: 50%;">1</span>'), # subscript-1 |
838 | - (re.compile(u'\u2082'), lambda match: '<span style="font-size: 50%;">2</span>'), # subscript-2 |
839 | - (re.compile(u'\u2083'), lambda match: '<span style="font-size: 50%;">3</span>'), # subscript-3 |
840 | - (re.compile(u'\u2084'), lambda match: '<span style="font-size: 50%;">4</span>'), # subscript-4 |
841 | - (re.compile(u'\u2085'), lambda match: '<span style="font-size: 50%;">5</span>'), # subscript-5 |
842 | - (re.compile(u'\u2086'), lambda match: '<span style="font-size: 50%;">6</span>'), # subscript-6 |
843 | - (re.compile(u'\u2087'), lambda match: '<span style="font-size: 50%;">7</span>'), # subscript-7 |
844 | - (re.compile(u'\u2088'), lambda match: '<span style="font-size: 50%;">8</span>'), # subscript-8 |
845 | - (re.compile(u'\u2089'), lambda match: '<span style="font-size: 50%;">9</span>'), # subscript-9 |
846 | + (re.compile(u'\u2080'), lambda match: '<span style="font-size: 40%;">0</span>'), # subscript-0 |
847 | + (re.compile(u'\u2081'), lambda match: '<span style="font-size: 40%;">1</span>'), # subscript-1 |
848 | + (re.compile(u'\u2082'), lambda match: '<span style="font-size: 40%;">2</span>'), # subscript-2 |
849 | + (re.compile(u'\u2083'), lambda match: '<span style="font-size: 40%;">3</span>'), # subscript-3 |
850 | + (re.compile(u'\u2084'), lambda match: '<span style="font-size: 40%;">4</span>'), # subscript-4 |
851 | + (re.compile(u'\u2085'), lambda match: '<span style="font-size: 40%;">5</span>'), # subscript-5 |
852 | + (re.compile(u'\u2086'), lambda match: '<span style="font-size: 40%;">6</span>'), # subscript-6 |
853 | + (re.compile(u'\u2087'), lambda match: '<span style="font-size: 40%;">7</span>'), # subscript-7 |
854 | + (re.compile(u'\u2088'), lambda match: '<span style="font-size: 40%;">8</span>'), # subscript-8 |
855 | + (re.compile(u'\u2089'), lambda match: '<span style="font-size: 40%;">9</span>'), # subscript-9 |
856 | + # always chance CO2 |
857 | + (re.compile(r'CO2'), lambda match: 'CO<span style="font-size: 40%;">2</span>'), # CO2 |
858 | + # remove *** paragraphs |
859 | + (re.compile(r'<p class="absatz">\*\*\*</p>'), lambda match: ''), |
860 | + # better layout for the top line of each article |
861 | + (re.compile(u'(?<=DIE ZEIT N\u00B0 \d /) (?=\d\d)'), lambda match: ' 20'), # proper year in edition number |
862 | + (re.compile(u'(?<=DIE ZEIT N\u00B0 \d\d /) (?=\d\d)'), lambda match: ' 20'), # proper year in edition number |
863 | + (re.compile(u'(?<=>)(?=DIE ZEIT N\u00B0 \d\d / 20\d\d)'), lambda match: u' \u2014 '), # m-dash between category and DIE ZEIT |
864 | ] |
865 | |
866 | def build_index(self): |
867 | - domain = "http://premium.zeit.de" |
868 | - url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok" |
869 | - |
870 | + domain = "https://premium.zeit.de" |
871 | + url = domain + "/abo/zeit_digital" |
872 | browser = self.get_browser() |
873 | - browser.add_password("http://premium.zeit.de", self.username, self.password) |
874 | - |
875 | - try: |
876 | - browser.open(url) |
877 | - except urllib2.HTTPError: |
878 | - self.report_progress(0,_("Can't login to download issue")) |
879 | - raise ValueError('Failed to login, check your username and password') |
880 | - |
881 | - response = browser.follow_link(text="DIE ZEIT als E-Paper") |
882 | - response = browser.follow_link(url_regex=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*')) |
883 | + |
884 | + # new login process |
885 | + response = browser.open(url) |
886 | + browser.select_form(nr=2) |
887 | + browser.form['name']=self.username |
888 | + browser.form['pass']=self.password |
889 | + browser.submit() |
890 | + # now find the correct file, we will still use the ePub file |
891 | + epublink = browser.find_link(text_regex=re.compile('.*Ausgabe als Datei im ePub-Format.*')) |
892 | + response = browser.follow_link(epublink) |
893 | + self.report_progress(1,_('next step')) |
894 | |
895 | tmp = PersistentTemporaryFile(suffix='.epub') |
896 | self.report_progress(0,_('downloading epub')) |
897 | @@ -104,9 +170,45 @@ |
898 | |
899 | # getting url of the cover |
900 | def get_cover_url(self): |
901 | + self.log.warning('Downloading cover') |
902 | try: |
903 | - inhalt = self.index_to_soup('http://www.zeit.de/inhalt') |
904 | - cover_url = inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','') |
905 | + self.log.warning('Trying PDF-based cover') |
906 | + domain = "https://premium.zeit.de" |
907 | + url = domain + "/abo/zeit_digital" |
908 | + browser = self.get_browser() |
909 | + |
910 | + # new login process |
911 | + browser.open(url) |
912 | + browser.select_form(nr=2) |
913 | + browser.form['name']=self.username |
914 | + browser.form['pass']=self.password |
915 | + browser.submit() |
916 | + # actual cover search |
917 | + pdflink = browser.find_link(url_regex=re.compile('system/files/epaper/DZ/pdf/DZ_ePaper*')) |
918 | + cover_url = urlparse(pdflink.base_url)[0]+'://'+urlparse(pdflink.base_url)[1]+''+(urlparse(pdflink.url)[2]).replace('ePaper_','').replace('.pdf','_001.pdf') |
919 | + self.log.warning('PDF link found:') |
920 | + self.log.warning(cover_url) |
921 | + # download the cover (has to be here due to new login process) |
922 | + with closing(browser.open(cover_url)) as r: |
923 | + cdata = r.read() |
924 | + from calibre.ebooks.metadata.pdf import get_metadata |
925 | + stream = cStringIO.StringIO(cdata) |
926 | + cdata = None |
927 | + mi = get_metadata(stream) |
928 | + if mi.cover_data and mi.cover_data[1]: |
929 | + cdata = mi.cover_data[1] |
930 | + |
931 | + cpath = os.path.join(self.output_dir, 'cover.jpg') |
932 | + save_cover_data_to(cdata, cpath) |
933 | + cover_url = cpath |
934 | + |
935 | except: |
936 | - cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg' |
937 | + self.log.warning('Trying low-res cover') |
938 | + try: |
939 | + inhalt = self.index_to_soup('http://www.zeit.de/inhalt') |
940 | + cover_url = inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','') |
941 | + except: |
942 | + self.log.warning('Using static old low-res cover') |
943 | + cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg' |
944 | return cover_url |
945 | + |
946 | |
947 | === modified file 'resources/default_tweaks.py' |
948 | --- resources/default_tweaks.py 2011-06-30 16:38:28 +0000 |
949 | +++ resources/default_tweaks.py 2011-07-19 06:18:03 +0000 |
950 | @@ -366,3 +366,10 @@ |
951 | # on at your own risk! |
952 | unified_title_toolbar_on_osx = False |
953 | |
954 | +#: Save original file when converting from same format to same format |
955 | +# When calibre does a conversion from the same format to the same format, for |
956 | +# example, from EPUB to EPUB, the original file is saved, so that in case the |
957 | +# conversion is poor, you can tweak the settings and run it again. By setting |
958 | +# this to False you can prevent calibre from saving the original file. |
959 | +save_original_format = True |
960 | + |
961 | |
962 | === modified file 'resources/images/devices/kindle.jpg' |
963 | Binary files resources/images/devices/kindle.jpg 2009-12-20 01:18:13 +0000 and resources/images/devices/kindle.jpg 2011-07-19 06:18:03 +0000 differ |
964 | === modified file 'resources/templates/fb2.xsl' |
965 | --- resources/templates/fb2.xsl 2011-06-11 15:27:21 +0000 |
966 | +++ resources/templates/fb2.xsl 2011-07-19 06:18:03 +0000 |
967 | @@ -379,7 +379,8 @@ |
968 | <!-- image --> |
969 | <xsl:template match="fb:image"> |
970 | <div align="center"> |
971 | - <img border="1"> |
972 | + <xsl:element name="img"> |
973 | + <xsl:attribute name="border">1</xsl:attribute> |
974 | <xsl:choose> |
975 | <xsl:when test="starts-with(@xlink:href,'#')"> |
976 | <xsl:attribute name="src"><xsl:value-of select="substring-after(@xlink:href,'#')"/></xsl:attribute> |
977 | @@ -388,7 +389,10 @@ |
978 | <xsl:attribute name="src"><xsl:value-of select="@xlink:href"/></xsl:attribute> |
979 | </xsl:otherwise> |
980 | </xsl:choose> |
981 | - </img> |
982 | + <xsl:if test="@title"> |
983 | + <xsl:attribute name="title"><xsl:value-of select="@title"/></xsl:attribute> |
984 | + </xsl:if> |
985 | + </xsl:element> |
986 | </div> |
987 | </xsl:template> |
988 | </xsl:stylesheet> |
989 | |
990 | === modified file 'session.vim' |
991 | --- session.vim 2011-07-10 19:29:15 +0000 |
992 | +++ session.vim 2011-07-19 06:18:03 +0000 |
993 | @@ -1,5 +1,5 @@ |
994 | " Project wide builtins |
995 | -let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"] |
996 | +let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"] |
997 | |
998 | python << EOFPY |
999 | import os |
1000 | @@ -15,7 +15,7 @@ |
1001 | project_dir=project_dir, base_dir=base_dir) |
1002 | |
1003 | def recipe_title_callback(raw): |
1004 | - return eval(raw.decode('utf-8')) |
1005 | + return eval(raw.decode('utf-8')).replace(' ', '_') |
1006 | |
1007 | vipy.session.add_content_browser('.r', ',r', 'Recipe', |
1008 | vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')), |
1009 | |
1010 | === modified file 'setup/check.py' |
1011 | --- setup/check.py 2011-07-10 19:29:15 +0000 |
1012 | +++ setup/check.py 2011-07-19 06:18:03 +0000 |
1013 | @@ -25,18 +25,11 @@ |
1014 | return '%s:%s: %s'%(self.filename, self.lineno, self.msg) |
1015 | |
1016 | def check_for_python_errors(code_string, filename): |
1017 | - # Since compiler.parse does not reliably report syntax errors, use the |
1018 | - # built in compiler first to detect those. |
1019 | + import _ast |
1020 | + # First, compile into an AST and handle syntax errors. |
1021 | try: |
1022 | - try: |
1023 | - compile(code_string, filename, "exec") |
1024 | - except MemoryError: |
1025 | - # Python 2.4 will raise MemoryError if the source can't be |
1026 | - # decoded. |
1027 | - if sys.version_info[:2] == (2, 4): |
1028 | - raise SyntaxError(None) |
1029 | - raise |
1030 | - except (SyntaxError, IndentationError), value: |
1031 | + tree = compile(code_string, filename, "exec", _ast.PyCF_ONLY_AST) |
1032 | + except (SyntaxError, IndentationError) as value: |
1033 | msg = value.args[0] |
1034 | |
1035 | (lineno, offset, text) = value.lineno, value.offset, value.text |
1036 | @@ -47,13 +40,11 @@ |
1037 | # bogus message that claims the encoding the file declared was |
1038 | # unknown. |
1039 | msg = "%s: problem decoding source" % filename |
1040 | + |
1041 | return [Message(filename, lineno, msg)] |
1042 | else: |
1043 | - # Okay, it's syntactically valid. Now parse it into an ast and check |
1044 | - # it. |
1045 | - import compiler |
1046 | checker = __import__('pyflakes.checker').checker |
1047 | - tree = compiler.parse(code_string) |
1048 | + # Okay, it's syntactically valid. Now check it. |
1049 | w = checker.Checker(tree, filename) |
1050 | w.messages.sort(lambda a, b: cmp(a.lineno, b.lineno)) |
1051 | return [Message(x.filename, x.lineno, x.message%x.message_args) for x in |
1052 | |
1053 | === modified file 'setup/translations.py' |
1054 | --- setup/translations.py 2011-07-10 19:29:15 +0000 |
1055 | +++ setup/translations.py 2011-07-19 06:18:03 +0000 |
1056 | @@ -8,11 +8,18 @@ |
1057 | |
1058 | import os, tempfile, shutil, subprocess, glob, re, time, textwrap |
1059 | from distutils import sysconfig |
1060 | +from functools import partial |
1061 | |
1062 | from setup import Command, __appname__, __version__ |
1063 | -from setup.build_environment import pyqt |
1064 | - |
1065 | -class POT(Command): |
1066 | + |
1067 | +def qt_sources(): |
1068 | + qtdir = glob.glob('/usr/src/qt-*')[-1] |
1069 | + j = partial(os.path.join, qtdir) |
1070 | + return list(map(j, [ |
1071 | + 'src/gui/widgets/qdialogbuttonbox.cpp', |
1072 | + ])) |
1073 | + |
1074 | +class POT(Command): # {{{ |
1075 | |
1076 | description = 'Update the .pot translation template' |
1077 | PATH = os.path.join(Command.SRC, __appname__, 'translations') |
1078 | @@ -82,6 +89,8 @@ |
1079 | time=time.strftime('%Y-%m-%d %H:%M+%Z')) |
1080 | |
1081 | files = self.source_files() |
1082 | + qt_inputs = qt_sources() |
1083 | + |
1084 | with tempfile.NamedTemporaryFile() as fl: |
1085 | fl.write('\n'.join(files)) |
1086 | fl.flush() |
1087 | @@ -91,8 +100,14 @@ |
1088 | subprocess.check_call(['xgettext', '-f', fl.name, |
1089 | '--default-domain=calibre', '-o', out.name, '-L', 'Python', |
1090 | '--from-code=UTF-8', '--sort-by-file', '--omit-header', |
1091 | - '--no-wrap', '-k__', |
1092 | + '--no-wrap', '-k__', '--add-comments=NOTE:', |
1093 | ]) |
1094 | + subprocess.check_call(['xgettext', '-j', |
1095 | + '--default-domain=calibre', '-o', out.name, |
1096 | + '--from-code=UTF-8', '--sort-by-file', '--omit-header', |
1097 | + '--no-wrap', '-kQT_TRANSLATE_NOOP:2', |
1098 | + ] + qt_inputs) |
1099 | + |
1100 | with open(out.name, 'rb') as f: |
1101 | src = f.read() |
1102 | os.remove(out.name) |
1103 | @@ -102,10 +117,12 @@ |
1104 | with open(pot, 'wb') as f: |
1105 | f.write(src) |
1106 | self.info('Translations template:', os.path.abspath(pot)) |
1107 | - return pot |
1108 | - |
1109 | - |
1110 | -class Translations(POT): |
1111 | + |
1112 | + |
1113 | + return pot |
1114 | +# }}} |
1115 | + |
1116 | +class Translations(POT): # {{{ |
1117 | description='''Compile the translations''' |
1118 | DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization', |
1119 | 'locales') |
1120 | @@ -117,7 +134,6 @@ |
1121 | locale = os.path.splitext(os.path.basename(po_file))[0] |
1122 | return locale, os.path.join(self.DEST, locale, 'messages.mo') |
1123 | |
1124 | - |
1125 | def run(self, opts): |
1126 | for f in self.po_files(): |
1127 | locale, dest = self.mo_file(f) |
1128 | @@ -126,7 +142,7 @@ |
1129 | os.makedirs(base) |
1130 | self.info('\tCompiling translations for', locale) |
1131 | subprocess.check_call(['msgfmt', '-o', dest, f]) |
1132 | - if locale in ('en_GB', 'nds', 'te', 'yi'): |
1133 | + if locale in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', 'ltg', 'nds', 'te', 'yi'): |
1134 | continue |
1135 | pycountry = self.j(sysconfig.get_python_lib(), 'pycountry', |
1136 | 'locales', locale, 'LC_MESSAGES') |
1137 | @@ -140,17 +156,6 @@ |
1138 | self.warn('No ISO 639 translations for locale:', locale, |
1139 | '\nDo you have pycountry installed?') |
1140 | |
1141 | - base = os.path.join(pyqt.qt_data_dir, 'translations') |
1142 | - qt_translations = glob.glob(os.path.join(base, 'qt_*.qm')) |
1143 | - if not qt_translations: |
1144 | - raise Exception('Could not find qt translations') |
1145 | - for f in qt_translations: |
1146 | - locale = self.s(self.b(f))[0][3:] |
1147 | - dest = self.j(self.DEST, locale, 'LC_MESSAGES', 'qt.qm') |
1148 | - if self.e(self.d(dest)) and self.newer(dest, f): |
1149 | - self.info('\tCopying Qt translation for locale:', locale) |
1150 | - shutil.copy2(f, dest) |
1151 | - |
1152 | self.write_stats() |
1153 | self.freeze_locales() |
1154 | |
1155 | @@ -201,7 +206,7 @@ |
1156 | for x in (i, j, d): |
1157 | if os.path.exists(x): |
1158 | os.remove(x) |
1159 | - |
1160 | +# }}} |
1161 | |
1162 | class GetTranslations(Translations): |
1163 | |
1164 | |
1165 | === modified file 'src/calibre/__init__.py' |
1166 | --- src/calibre/__init__.py 2011-06-27 21:34:52 +0000 |
1167 | +++ src/calibre/__init__.py 2011-07-19 06:18:03 +0000 |
1168 | @@ -341,7 +341,7 @@ |
1169 | def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None): |
1170 | ''' |
1171 | Create a mechanize browser for web scraping. The browser handles cookies, |
1172 | - refresh requests and ignores robots.txt. Also uses proxy if avaialable. |
1173 | + refresh requests and ignores robots.txt. Also uses proxy if available. |
1174 | |
1175 | :param honor_time: If True honors pause time in refresh requests |
1176 | :param max_time: Maximum time in seconds to wait during a refresh request |
1177 | @@ -474,7 +474,7 @@ |
1178 | def my_unichr(num): |
1179 | try: |
1180 | return unichr(num) |
1181 | - except ValueError: |
1182 | + except (ValueError, OverflowError): |
1183 | return u'?' |
1184 | |
1185 | def entity_to_unicode(match, exceptions=[], encoding='cp1252', |
1186 | |
1187 | === modified file 'src/calibre/constants.py' |
1188 | --- src/calibre/constants.py 2011-07-08 17:01:37 +0000 |
1189 | +++ src/calibre/constants.py 2011-07-19 06:18:03 +0000 |
1190 | @@ -4,7 +4,7 @@ |
1191 | __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net' |
1192 | __docformat__ = 'restructuredtext en' |
1193 | __appname__ = u'calibre' |
1194 | -numeric_version = (0, 8, 9) |
1195 | +numeric_version = (0, 8, 10) |
1196 | __version__ = u'.'.join(map(unicode, numeric_version)) |
1197 | __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>" |
1198 | |
1199 | |
1200 | === modified file 'src/calibre/customize/builtins.py' |
1201 | --- src/calibre/customize/builtins.py 2011-07-11 11:31:21 +0000 |
1202 | +++ src/calibre/customize/builtins.py 2011-07-19 06:18:03 +0000 |
1203 | @@ -1181,6 +1181,16 @@ |
1204 | headquarters = 'US' |
1205 | formats = ['EPUB', 'MOBI', 'PDF'] |
1206 | |
1207 | +class StoreChitankaStore(StoreBase): |
1208 | + name = u'Моята библиотека' |
1209 | + author = 'Alex Stanev' |
1210 | + description = u'Независим сайт за DRM свободна литература на български език' |
1211 | + actual_plugin = 'calibre.gui2.store.stores.chitanka_plugin:ChitankaStore' |
1212 | + |
1213 | + drm_free_only = True |
1214 | + headquarters = 'BG' |
1215 | + formats = ['FB2', 'EPUB', 'TXT', 'SFB'] |
1216 | + |
1217 | class StoreDieselEbooksStore(StoreBase): |
1218 | name = 'Diesel eBooks' |
1219 | description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.' |
1220 | @@ -1455,6 +1465,7 @@ |
1221 | StoreBNStore, |
1222 | StoreBeamEBooksDEStore, |
1223 | StoreBeWriteStore, |
1224 | + StoreChitankaStore, |
1225 | StoreDieselEbooksStore, |
1226 | StoreEbookNLStore, |
1227 | StoreEbookscomStore, |
1228 | |
1229 | === modified file 'src/calibre/db/backend.py' |
1230 | --- src/calibre/db/backend.py 2011-07-04 01:03:52 +0000 |
1231 | +++ src/calibre/db/backend.py 2011-07-19 06:18:03 +0000 |
1232 | @@ -8,7 +8,7 @@ |
1233 | __docformat__ = 'restructuredtext en' |
1234 | |
1235 | # Imports {{{ |
1236 | -import os, shutil, uuid, json |
1237 | +import os, shutil, uuid, json, glob, time, tempfile |
1238 | from functools import partial |
1239 | |
1240 | import apsw |
1241 | @@ -25,7 +25,7 @@ |
1242 | from calibre.utils.date import utcfromtimestamp, parse_date |
1243 | from calibre.utils.filenames import is_case_sensitive |
1244 | from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable, |
1245 | - SizeTable, FormatsTable, AuthorsTable, IdentifiersTable) |
1246 | + SizeTable, FormatsTable, AuthorsTable, IdentifiersTable, CompositeTable) |
1247 | # }}} |
1248 | |
1249 | ''' |
1250 | @@ -37,6 +37,8 @@ |
1251 | |
1252 | ''' |
1253 | |
1254 | +SPOOL_SIZE = 30*1024*1024 |
1255 | + |
1256 | class DynamicFilter(object): # {{{ |
1257 | |
1258 | 'No longer used, present for legacy compatibility' |
1259 | @@ -478,7 +480,6 @@ |
1260 | remove.append(data) |
1261 | continue |
1262 | |
1263 | - self.custom_column_label_map[data['label']] = data['num'] |
1264 | self.custom_column_num_map[data['num']] = \ |
1265 | self.custom_column_label_map[data['label']] = data |
1266 | |
1267 | @@ -613,10 +614,31 @@ |
1268 | |
1269 | tables['size'] = SizeTable('size', self.field_metadata['size'].copy()) |
1270 | |
1271 | - for label, data in self.custom_column_label_map.iteritems(): |
1272 | - label = '#' + label |
1273 | + self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3, |
1274 | + 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8, |
1275 | + 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12, |
1276 | + 'formats':13, 'path':14, 'pubdate':15, 'uuid':16, 'cover':17, |
1277 | + 'au_map':18, 'last_modified':19, 'identifiers':20} |
1278 | + |
1279 | + for k,v in self.FIELD_MAP.iteritems(): |
1280 | + self.field_metadata.set_field_record_index(k, v, prefer_custom=False) |
1281 | + |
1282 | + base = max(self.FIELD_MAP.itervalues()) |
1283 | + |
1284 | + for label_, data in self.custom_column_label_map.iteritems(): |
1285 | + label = self.field_metadata.custom_field_prefix + label_ |
1286 | metadata = self.field_metadata[label].copy() |
1287 | link_table = self.custom_table_names(data['num'])[1] |
1288 | + self.FIELD_MAP[data['num']] = base = base+1 |
1289 | + self.field_metadata.set_field_record_index(label_, base, |
1290 | + prefer_custom=True) |
1291 | + if data['datatype'] == 'series': |
1292 | + # account for the series index column. Field_metadata knows that |
1293 | + # the series index is one larger than the series. If you change |
1294 | + # it here, be sure to change it there as well. |
1295 | + self.FIELD_MAP[str(data['num'])+'_index'] = base = base+1 |
1296 | + self.field_metadata.set_field_record_index(label_+'_index', base, |
1297 | + prefer_custom=True) |
1298 | |
1299 | if data['normalized']: |
1300 | if metadata['is_multiple']: |
1301 | @@ -633,7 +655,16 @@ |
1302 | metadata['table'] = link_table |
1303 | tables[label] = OneToOneTable(label, metadata) |
1304 | else: |
1305 | - tables[label] = OneToOneTable(label, metadata) |
1306 | + if data['datatype'] == 'composite': |
1307 | + tables[label] = CompositeTable(label, metadata) |
1308 | + else: |
1309 | + tables[label] = OneToOneTable(label, metadata) |
1310 | + |
1311 | + self.FIELD_MAP['ondevice'] = base = base+1 |
1312 | + self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False) |
1313 | + self.FIELD_MAP['marked'] = base = base+1 |
1314 | + self.field_metadata.set_field_record_index('marked', base, prefer_custom=False) |
1315 | + |
1316 | # }}} |
1317 | |
1318 | @property |
1319 | @@ -732,5 +763,57 @@ |
1320 | pprint.pprint(table.metadata) |
1321 | raise |
1322 | |
1323 | + def format_abspath(self, book_id, fmt, fname, path): |
1324 | + path = os.path.join(self.library_path, path) |
1325 | + fmt = ('.' + fmt.lower()) if fmt else '' |
1326 | + fmt_path = os.path.join(path, fname+fmt) |
1327 | + if os.path.exists(fmt_path): |
1328 | + return fmt_path |
1329 | + try: |
1330 | + candidates = glob.glob(os.path.join(path, '*'+fmt)) |
1331 | + except: # If path contains strange characters this throws an exc |
1332 | + candidates = [] |
1333 | + if fmt and candidates and os.path.exists(candidates[0]): |
1334 | + shutil.copyfile(candidates[0], fmt_path) |
1335 | + return fmt_path |
1336 | + |
1337 | + def format_metadata(self, book_id, fmt, fname, path): |
1338 | + path = self.format_abspath(book_id, fmt, fname, path) |
1339 | + ans = {} |
1340 | + if path is not None: |
1341 | + stat = os.stat(path) |
1342 | + ans['size'] = stat.st_size |
1343 | + ans['mtime'] = utcfromtimestamp(stat.st_mtime) |
1344 | + return ans |
1345 | + |
1346 | + def cover(self, path, as_file=False, as_image=False, |
1347 | + as_path=False): |
1348 | + path = os.path.join(self.library_path, path, 'cover.jpg') |
1349 | + ret = None |
1350 | + if os.access(path, os.R_OK): |
1351 | + try: |
1352 | + f = lopen(path, 'rb') |
1353 | + except (IOError, OSError): |
1354 | + time.sleep(0.2) |
1355 | + f = lopen(path, 'rb') |
1356 | + with f: |
1357 | + if as_path: |
1358 | + pt = PersistentTemporaryFile('_dbcover.jpg') |
1359 | + with pt: |
1360 | + shutil.copyfileobj(f, pt) |
1361 | + return pt.name |
1362 | + if as_file: |
1363 | + ret = tempfile.SpooledTemporaryFile(SPOOL_SIZE) |
1364 | + shutil.copyfileobj(f, ret) |
1365 | + ret.seek(0) |
1366 | + else: |
1367 | + ret = f.read() |
1368 | + if as_image: |
1369 | + from PyQt4.Qt import QImage |
1370 | + i = QImage() |
1371 | + i.loadFromData(ret) |
1372 | + ret = i |
1373 | + return ret |
1374 | + |
1375 | # }}} |
1376 | |
1377 | |
1378 | === modified file 'src/calibre/db/cache.py' |
1379 | --- src/calibre/db/cache.py 2011-07-05 04:59:54 +0000 |
1380 | +++ src/calibre/db/cache.py 2011-07-19 06:18:03 +0000 |
1381 | @@ -7,5 +7,380 @@ |
1382 | __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
1383 | __docformat__ = 'restructuredtext en' |
1384 | |
1385 | - |
1386 | - |
1387 | +import os |
1388 | +from collections import defaultdict |
1389 | +from functools import wraps, partial |
1390 | + |
1391 | +from calibre.db.locking import create_locks, RecordLock |
1392 | +from calibre.db.fields import create_field |
1393 | +from calibre.ebooks.book.base import Metadata |
1394 | +from calibre.utils.date import now |
1395 | + |
1396 | +def api(f): |
1397 | + f.is_cache_api = True |
1398 | + return f |
1399 | + |
1400 | +def read_api(f): |
1401 | + f = api(f) |
1402 | + f.is_read_api = True |
1403 | + return f |
1404 | + |
1405 | +def write_api(f): |
1406 | + f = api(f) |
1407 | + f.is_read_api = False |
1408 | + return f |
1409 | + |
1410 | +def wrap_simple(lock, func): |
1411 | + @wraps(func) |
1412 | + def ans(*args, **kwargs): |
1413 | + with lock: |
1414 | + return func(*args, **kwargs) |
1415 | + return ans |
1416 | + |
1417 | + |
1418 | +class Cache(object): |
1419 | + |
1420 | + def __init__(self, backend): |
1421 | + self.backend = backend |
1422 | + self.fields = {} |
1423 | + self.composites = set() |
1424 | + self.read_lock, self.write_lock = create_locks() |
1425 | + self.record_lock = RecordLock(self.read_lock) |
1426 | + self.format_metadata_cache = defaultdict(dict) |
1427 | + |
1428 | + # Implement locking for all simple read/write API methods |
1429 | + # An unlocked version of the method is stored with the name starting |
1430 | + # with a leading underscore. Use the unlocked versions when the lock |
1431 | + # has already been acquired. |
1432 | + for name in dir(self): |
1433 | + func = getattr(self, name) |
1434 | + ira = getattr(func, 'is_read_api', None) |
1435 | + if ira is not None: |
1436 | + # Save original function |
1437 | + setattr(self, '_'+name, func) |
1438 | + # Wrap it in a lock |
1439 | + lock = self.read_lock if ira else self.write_lock |
1440 | + setattr(self, name, wrap_simple(lock, func)) |
1441 | + |
1442 | + @property |
1443 | + def field_metadata(self): |
1444 | + return self.backend.field_metadata |
1445 | + |
1446 | + def _format_abspath(self, book_id, fmt): |
1447 | + ''' |
1448 | + Return absolute path to the ebook file of format `format` |
1449 | + |
1450 | + WARNING: This method will return a dummy path for a network backend DB, |
1451 | + so do not rely on it, use format(..., as_path=True) instead. |
1452 | + |
1453 | + Currently used only in calibredb list, the viewer and the catalogs (via |
1454 | + get_data_as_dict()). |
1455 | + |
1456 | + Apart from the viewer, I don't believe any of the others do any file |
1457 | + I/O with the results of this call. |
1458 | + ''' |
1459 | + try: |
1460 | + name = self.fields['formats'].format_fname(book_id, fmt) |
1461 | + path = self._field_for('path', book_id).replace('/', os.sep) |
1462 | + except: |
1463 | + return None |
1464 | + if name and path: |
1465 | + return self.backend.format_abspath(book_id, fmt, name, path) |
1466 | + |
1467 | + def _get_metadata(self, book_id, get_user_categories=True): # {{{ |
1468 | + mi = Metadata(None) |
1469 | + author_ids = self._field_ids_for('authors', book_id) |
1470 | + aut_list = [self._author_data(i) for i in author_ids] |
1471 | + aum = [] |
1472 | + aus = {} |
1473 | + aul = {} |
1474 | + for rec in aut_list: |
1475 | + aut = rec['name'] |
1476 | + aum.append(aut) |
1477 | + aus[aut] = rec['sort'] |
1478 | + aul[aut] = rec['link'] |
1479 | + mi.title = self._field_for('title', book_id, |
1480 | + default_value=_('Unknown')) |
1481 | + mi.authors = aum |
1482 | + mi.author_sort = self._field_for('author_sort', book_id, |
1483 | + default_value=_('Unknown')) |
1484 | + mi.author_sort_map = aus |
1485 | + mi.author_link_map = aul |
1486 | + mi.comments = self._field_for('comments', book_id) |
1487 | + mi.publisher = self._field_for('publisher', book_id) |
1488 | + n = now() |
1489 | + mi.timestamp = self._field_for('timestamp', book_id, default_value=n) |
1490 | + mi.pubdate = self._field_for('pubdate', book_id, default_value=n) |
1491 | + mi.uuid = self._field_for('uuid', book_id, |
1492 | + default_value='dummy') |
1493 | + mi.title_sort = self._field_for('sort', book_id, |
1494 | + default_value=_('Unknown')) |
1495 | + mi.book_size = self._field_for('size', book_id, default_value=0) |
1496 | + mi.ondevice_col = self._field_for('ondevice', book_id, default_value='') |
1497 | + mi.last_modified = self._field_for('last_modified', book_id, |
1498 | + default_value=n) |
1499 | + formats = self._field_for('formats', book_id) |
1500 | + mi.format_metadata = {} |
1501 | + if not formats: |
1502 | + formats = None |
1503 | + else: |
1504 | + for f in formats: |
1505 | + mi.format_metadata[f] = self._format_metadata(book_id, f) |
1506 | + formats = ','.join(formats) |
1507 | + mi.formats = formats |
1508 | + mi.has_cover = _('Yes') if self._field_for('cover', book_id, |
1509 | + default_value=False) else '' |
1510 | + mi.tags = list(self._field_for('tags', book_id, default_value=())) |
1511 | + mi.series = self._field_for('series', book_id) |
1512 | + if mi.series: |
1513 | + mi.series_index = self._field_for('series_index', book_id, |
1514 | + default_value=1.0) |
1515 | + mi.rating = self._field_for('rating', book_id) |
1516 | + mi.set_identifiers(self._field_for('identifiers', book_id, |
1517 | + default_value={})) |
1518 | + mi.application_id = book_id |
1519 | + mi.id = book_id |
1520 | + composites = {} |
1521 | + for key, meta in self.field_metadata.custom_iteritems(): |
1522 | + mi.set_user_metadata(key, meta) |
1523 | + if meta['datatype'] == 'composite': |
1524 | + composites.append(key) |
1525 | + else: |
1526 | + mi.set(key, val=self._field_for(meta['label'], book_id), |
1527 | + extra=self._field_for(meta['label']+'_index', book_id)) |
1528 | + for c in composites: |
1529 | + mi.set(key, val=self._composite_for(key, book_id, mi)) |
1530 | + |
1531 | + user_cat_vals = {} |
1532 | + if get_user_categories: |
1533 | + user_cats = self.prefs['user_categories'] |
1534 | + for ucat in user_cats: |
1535 | + res = [] |
1536 | + for name,cat,ign in user_cats[ucat]: |
1537 | + v = mi.get(cat, None) |
1538 | + if isinstance(v, list): |
1539 | + if name in v: |
1540 | + res.append([name,cat]) |
1541 | + elif name == v: |
1542 | + res.append([name,cat]) |
1543 | + user_cat_vals[ucat] = res |
1544 | + mi.user_categories = user_cat_vals |
1545 | + |
1546 | + return mi |
1547 | + # }}} |
1548 | + |
1549 | + # Cache Layer API {{{ |
1550 | + |
1551 | + @api |
1552 | + def init(self): |
1553 | + ''' |
1554 | + Initialize this cache with data from the backend. |
1555 | + ''' |
1556 | + with self.write_lock: |
1557 | + self.backend.read_tables() |
1558 | + |
1559 | + for field, table in self.backend.tables.iteritems(): |
1560 | + self.fields[field] = create_field(field, table) |
1561 | + if table.metadata['datatype'] == 'composite': |
1562 | + self.composites.add(field) |
1563 | + |
1564 | + self.fields['ondevice'] = create_field('ondevice', None) |
1565 | + |
1566 | + @read_api |
1567 | + def field_for(self, name, book_id, default_value=None): |
1568 | + ''' |
1569 | + Return the value of the field ``name`` for the book identified by |
1570 | + ``book_id``. If no such book exists or it has no defined value for the |
1571 | + field ``name`` or no such field exists, then ``default_value`` is returned. |
1572 | + |
1573 | + The returned value for is_multiple fields are always tuples. |
1574 | + ''' |
1575 | + if self.composites and name in self.composites: |
1576 | + return self.composite_for(name, book_id, |
1577 | + default_value=default_value) |
1578 | + try: |
1579 | + return self.fields[name].for_book(book_id, default_value=default_value) |
1580 | + except (KeyError, IndexError): |
1581 | + return default_value |
1582 | + |
1583 | + @read_api |
1584 | + def composite_for(self, name, book_id, mi=None, default_value=''): |
1585 | + try: |
1586 | + f = self.fields[name] |
1587 | + except KeyError: |
1588 | + return default_value |
1589 | + |
1590 | + if mi is None: |
1591 | + return f.get_value_with_cache(book_id, partial(self._get_metadata, |
1592 | + get_user_categories=False)) |
1593 | + else: |
1594 | + return f.render_composite(book_id, mi) |
1595 | + |
1596 | + @read_api |
1597 | + def field_ids_for(self, name, book_id): |
1598 | + ''' |
1599 | + Return the ids (as a tuple) for the values that the field ``name`` has on the book |
1600 | + identified by ``book_id``. If there are no values, or no such book, or |
1601 | + no such field, an empty tuple is returned. |
1602 | + ''' |
1603 | + try: |
1604 | + return self.fields[name].ids_for_book(book_id) |
1605 | + except (KeyError, IndexError): |
1606 | + return () |
1607 | + |
1608 | + @read_api |
1609 | + def books_for_field(self, name, item_id): |
1610 | + ''' |
1611 | + Return all the books associated with the item identified by |
1612 | + ``item_id``, where the item belongs to the field ``name``. |
1613 | + |
1614 | + Returned value is a tuple of book ids, or the empty tuple if the item |
1615 | + or the field does not exist. |
1616 | + ''' |
1617 | + try: |
1618 | + return self.fields[name].books_for(item_id) |
1619 | + except (KeyError, IndexError): |
1620 | + return () |
1621 | + |
1622 | + @read_api |
1623 | + def all_book_ids(self): |
1624 | + ''' |
1625 | + Frozen set of all known book ids. |
1626 | + ''' |
1627 | + return frozenset(self.fields['uuid'].iter_book_ids()) |
1628 | + |
1629 | + @read_api |
1630 | + def all_field_ids(self, name): |
1631 | + ''' |
1632 | + Frozen set of ids for all values in the field ``name``. |
1633 | + ''' |
1634 | + return frozenset(iter(self.fields[name])) |
1635 | + |
1636 | + @read_api |
1637 | + def author_data(self, author_id): |
1638 | + ''' |
1639 | + Return author data as a dictionary with keys: name, sort, link |
1640 | + |
1641 | + If no author with the specified id is found an empty dictionary is |
1642 | + returned. |
1643 | + ''' |
1644 | + try: |
1645 | + return self.fields['authors'].author_data(author_id) |
1646 | + except (KeyError, IndexError): |
1647 | + return {} |
1648 | + |
1649 | + @read_api |
1650 | + def format_metadata(self, book_id, fmt, allow_cache=True): |
1651 | + if not fmt: |
1652 | + return {} |
1653 | + fmt = fmt.upper() |
1654 | + if allow_cache: |
1655 | + x = self.format_metadata_cache[book_id].get(fmt, None) |
1656 | + if x is not None: |
1657 | + return x |
1658 | + try: |
1659 | + name = self.fields['formats'].format_fname(book_id, fmt) |
1660 | + path = self._field_for('path', book_id).replace('/', os.sep) |
1661 | + except: |
1662 | + return {} |
1663 | + |
1664 | + ans = {} |
1665 | + if path and name: |
1666 | + ans = self.backend.format_metadata(book_id, fmt, name, path) |
1667 | + self.format_metadata_cache[book_id][fmt] = ans |
1668 | + return ans |
1669 | + |
1670 | + @api |
1671 | + def get_metadata(self, book_id, |
1672 | + get_cover=False, get_user_categories=True, cover_as_data=False): |
1673 | + ''' |
1674 | + Return metadata for the book identified by book_id as a :class:`Metadata` object. |
1675 | + Note that the list of formats is not verified. If get_cover is True, |
1676 | + the cover is returned, either a path to temp file as mi.cover or if |
1677 | + cover_as_data is True then as mi.cover_data. |
1678 | + ''' |
1679 | + |
1680 | + with self.read_lock: |
1681 | + mi = self._get_metadata(book_id, get_user_categories=get_user_categories) |
1682 | + |
1683 | + if get_cover: |
1684 | + if cover_as_data: |
1685 | + cdata = self.cover(book_id) |
1686 | + if cdata: |
1687 | + mi.cover_data = ('jpeg', cdata) |
1688 | + else: |
1689 | + mi.cover = self.cover(book_id, as_path=True) |
1690 | + |
1691 | + return mi |
1692 | + |
1693 | + @api |
1694 | + def cover(self, book_id, |
1695 | + as_file=False, as_image=False, as_path=False): |
1696 | + ''' |
1697 | + Return the cover image or None. By default, returns the cover as a |
1698 | + bytestring. |
1699 | + |
1700 | + WARNING: Using as_path will copy the cover to a temp file and return |
1701 | + the path to the temp file. You should delete the temp file when you are |
1702 | + done with it. |
1703 | + |
1704 | + :param as_file: If True return the image as an open file object (a SpooledTemporaryFile) |
1705 | + :param as_image: If True return the image as a QImage object |
1706 | + :param as_path: If True return the image as a path pointing to a |
1707 | + temporary file |
1708 | + ''' |
1709 | + with self.read_lock: |
1710 | + try: |
1711 | + path = self._field_for('path', book_id).replace('/', os.sep) |
1712 | + except: |
1713 | + return None |
1714 | + |
1715 | + with self.record_lock.lock(book_id): |
1716 | + return self.backend.cover(path, as_file=as_file, as_image=as_image, |
1717 | + as_path=as_path) |
1718 | + |
1719 | + @read_api |
1720 | + def multisort(self, fields): |
1721 | + all_book_ids = frozenset(self._all_book_ids()) |
1722 | + get_metadata = partial(self._get_metadata, get_user_categories=False) |
1723 | + |
1724 | + sort_keys = tuple(self.fields[field[0]].sort_keys_for_books(get_metadata, |
1725 | + all_book_ids) for field in fields) |
1726 | + |
1727 | + if len(sort_keys) == 1: |
1728 | + sk = sort_keys[0] |
1729 | + return sorted(all_book_ids, key=lambda i:sk[i], reverse=not |
1730 | + fields[1]) |
1731 | + else: |
1732 | + return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys)) |
1733 | + |
1734 | + # }}} |
1735 | + |
1736 | +class SortKey(object): |
1737 | + |
1738 | + def __init__(self, fields, sort_keys, book_id): |
1739 | + self.orders = tuple(1 if f[1] else -1 for f in fields) |
1740 | + self.sort_key = tuple(sk[book_id] for sk in sort_keys) |
1741 | + |
1742 | + def __cmp__(self, other): |
1743 | + for i, order in enumerate(self.orders): |
1744 | + ans = cmp(self.sort_key[i], other.sort_key[i]) |
1745 | + if ans != 0: |
1746 | + return ans * order |
1747 | + return 0 |
1748 | + |
1749 | + |
1750 | +# Testing {{{ |
1751 | + |
1752 | +def test(library_path): |
1753 | + from calibre.db.backend import DB |
1754 | + backend = DB(library_path) |
1755 | + cache = Cache(backend) |
1756 | + cache.init() |
1757 | + print ('All book ids:', cache.all_book_ids()) |
1758 | + |
1759 | +if __name__ == '__main__': |
1760 | + from calibre.utils.config import prefs |
1761 | + test(prefs['library_path']) |
1762 | + |
1763 | +# }}} |
1764 | |
1765 | === added file 'src/calibre/db/fields.py' |
1766 | --- src/calibre/db/fields.py 1970-01-01 00:00:00 +0000 |
1767 | +++ src/calibre/db/fields.py 2011-07-19 06:18:03 +0000 |
1768 | @@ -0,0 +1,257 @@ |
1769 | +#!/usr/bin/env python |
1770 | +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai |
1771 | +from __future__ import (unicode_literals, division, absolute_import, |
1772 | + print_function) |
1773 | +from future_builtins import map |
1774 | + |
1775 | +__license__ = 'GPL v3' |
1776 | +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
1777 | +__docformat__ = 'restructuredtext en' |
1778 | + |
1779 | +from threading import Lock |
1780 | + |
1781 | +from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY |
1782 | +from calibre.utils.icu import sort_key |
1783 | + |
1784 | +class Field(object): |
1785 | + |
1786 | + def __init__(self, name, table): |
1787 | + self.name, self.table = name, table |
1788 | + self.has_text_data = self.metadata['datatype'] in ('text', 'comments', |
1789 | + 'series', 'enumeration') |
1790 | + self.table_type = self.table.table_type |
1791 | + dt = self.metadata['datatype'] |
1792 | + self._sort_key = (sort_key if dt == 'text' else lambda x: x) |
1793 | + |
1794 | + @property |
1795 | + def metadata(self): |
1796 | + return self.table.metadata |
1797 | + |
1798 | + def for_book(self, book_id, default_value=None): |
1799 | + ''' |
1800 | + Return the value of this field for the book identified by book_id. |
1801 | + When no value is found, returns ``default_value``. |
1802 | + ''' |
1803 | + raise NotImplementedError() |
1804 | + |
1805 | + def ids_for_book(self, book_id): |
1806 | + ''' |
1807 | + Return a tuple of items ids for items associated with the book |
1808 | + identified by book_ids. Returns an empty tuple if no such items are |
1809 | + found. |
1810 | + ''' |
1811 | + raise NotImplementedError() |
1812 | + |
1813 | + def books_for(self, item_id): |
1814 | + ''' |
1815 | + Return the ids of all books associated with the item identified by |
1816 | + item_id as a tuple. An empty tuple is returned if no books are found. |
1817 | + ''' |
1818 | + raise NotImplementedError() |
1819 | + |
1820 | + def __iter__(self): |
1821 | + ''' |
1822 | + Iterate over the ids for all values in this field |
1823 | + ''' |
1824 | + raise NotImplementedError() |
1825 | + |
1826 | + def sort_keys_for_books(self, get_metadata, all_book_ids): |
1827 | + ''' |
1828 | + Return a mapping of book_id -> sort_key. The sort key is suitable for |
1829 | + use in sorting the list of all books by this field, via the python cmp |
1830 | + method. |
1831 | + ''' |
1832 | + raise NotImplementedError() |
1833 | + |
1834 | + |
1835 | +class OneToOneField(Field): |
1836 | + |
1837 | + def for_book(self, book_id, default_value=None): |
1838 | + return self.table.book_col_map.get(book_id, default_value) |
1839 | + |
1840 | + def ids_for_book(self, book_id): |
1841 | + return (book_id,) |
1842 | + |
1843 | + def books_for(self, item_id): |
1844 | + return (item_id,) |
1845 | + |
1846 | + def __iter__(self): |
1847 | + return self.table.book_col_map.iterkeys() |
1848 | + |
1849 | + def iter_book_ids(self): |
1850 | + return self.table.book_col_map.iterkeys() |
1851 | + |
1852 | + def sort_keys_for_books(self, get_metadata, all_book_ids): |
1853 | + return {id_ : self._sort_key(self.book_col_map.get(id_, '')) for id_ in |
1854 | + all_book_ids} |
1855 | + |
1856 | +class CompositeField(OneToOneField): |
1857 | + |
1858 | + def __init__(self, *args, **kwargs): |
1859 | + OneToOneField.__init__(self, *args, **kwargs) |
1860 | + |
1861 | + self._render_cache = {} |
1862 | + self._lock = Lock() |
1863 | + |
1864 | + def render_composite(self, book_id, mi): |
1865 | + with self._lock: |
1866 | + ans = self._render_cache.get(book_id, None) |
1867 | + if ans is None: |
1868 | + ans = mi.get(self.metadata['label']) |
1869 | + with self._lock: |
1870 | + self._render_cache[book_id] = ans |
1871 | + return ans |
1872 | + |
1873 | + def clear_cache(self): |
1874 | + with self._lock: |
1875 | + self._render_cache = {} |
1876 | + |
1877 | + def pop_cache(self, book_id): |
1878 | + with self._lock: |
1879 | + self._render_cache.pop(book_id, None) |
1880 | + |
1881 | + def get_value_with_cache(self, book_id, get_metadata): |
1882 | + with self._lock: |
1883 | + ans = self._render_cache.get(book_id, None) |
1884 | + if ans is None: |
1885 | + mi = get_metadata(book_id) |
1886 | + ans = mi.get(self.metadata['label']) |
1887 | + return ans |
1888 | + |
1889 | + def sort_keys_for_books(self, get_metadata, all_book_ids): |
1890 | + return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in |
1891 | + all_book_ids} |
1892 | + |
1893 | + |
1894 | +class OnDeviceField(OneToOneField): |
1895 | + |
1896 | + def __init__(self, name, table): |
1897 | + self.name = name |
1898 | + self.book_on_device_func = None |
1899 | + |
1900 | + def book_on_device(self, book_id): |
1901 | + if callable(self.book_on_device_func): |
1902 | + return self.book_on_device_func(book_id) |
1903 | + return None |
1904 | + |
1905 | + def set_book_on_device_func(self, func): |
1906 | + self.book_on_device_func = func |
1907 | + |
1908 | + def for_book(self, book_id, default_value=None): |
1909 | + loc = [] |
1910 | + count = 0 |
1911 | + on = self.book_on_device(book_id) |
1912 | + if on is not None: |
1913 | + m, a, b, count = on[:4] |
1914 | + if m is not None: |
1915 | + loc.append(_('Main')) |
1916 | + if a is not None: |
1917 | + loc.append(_('Card A')) |
1918 | + if b is not None: |
1919 | + loc.append(_('Card B')) |
1920 | + return ', '.join(loc) + ((' (%s books)'%count) if count > 1 else '') |
1921 | + |
1922 | + def __iter__(self): |
1923 | + return iter(()) |
1924 | + |
1925 | + def iter_book_ids(self): |
1926 | + return iter(()) |
1927 | + |
1928 | + def sort_keys_for_books(self, get_metadata, all_book_ids): |
1929 | + return {id_ : self.for_book(id_) for id_ in |
1930 | + all_book_ids} |
1931 | + |
1932 | +class ManyToOneField(Field): |
1933 | + |
1934 | + def for_book(self, book_id, default_value=None): |
1935 | + ids = self.table.book_col_map.get(book_id, None) |
1936 | + if ids is not None: |
1937 | + ans = self.id_map[ids] |
1938 | + else: |
1939 | + ans = default_value |
1940 | + return ans |
1941 | + |
1942 | + def ids_for_book(self, book_id): |
1943 | + id_ = self.table.book_col_map.get(book_id, None) |
1944 | + if id_ is None: |
1945 | + return () |
1946 | + return (id_,) |
1947 | + |
1948 | + def books_for(self, item_id): |
1949 | + return self.table.col_book_map.get(item_id, ()) |
1950 | + |
1951 | + def __iter__(self): |
1952 | + return self.table.id_map.iterkeys() |
1953 | + |
1954 | + def sort_keys_for_books(self, get_metadata, all_book_ids): |
1955 | + keys = {id_ : self._sort_key(self.id_map.get(id_, '')) for id_ in |
1956 | + all_book_ids} |
1957 | + return {id_ : keys.get( |
1958 | + self.book_col_map.get(id_, None), '') for id_ in all_book_ids} |
1959 | + |
1960 | +class ManyToManyField(Field): |
1961 | + |
1962 | + def __init__(self, *args, **kwargs): |
1963 | + Field.__init__(self, *args, **kwargs) |
1964 | + self.alphabetical_sort = self.name != 'authors' |
1965 | + |
1966 | + def for_book(self, book_id, default_value=None): |
1967 | + ids = self.table.book_col_map.get(book_id, ()) |
1968 | + if ids: |
1969 | + ans = tuple(self.id_map[i] for i in ids) |
1970 | + else: |
1971 | + ans = default_value |
1972 | + return ans |
1973 | + |
1974 | + def ids_for_book(self, book_id): |
1975 | + return self.table.book_col_map.get(book_id, ()) |
1976 | + |
1977 | + def books_for(self, item_id): |
1978 | + return self.table.col_book_map.get(item_id, ()) |
1979 | + |
1980 | + def __iter__(self): |
1981 | + return self.table.id_map.iterkeys() |
1982 | + |
1983 | + def sort_keys_for_books(self, get_metadata, all_book_ids): |
1984 | + keys = {id_ : self._sort_key(self.id_map.get(id_, '')) for id_ in |
1985 | + all_book_ids} |
1986 | + |
1987 | + def sort_key_for_book(book_id): |
1988 | + item_ids = self.table.book_col_map.get(book_id, ()) |
1989 | + if self.alphabetical_sort: |
1990 | + item_ids = sorted(item_ids, key=keys.get) |
1991 | + return tuple(map(keys.get, item_ids)) |
1992 | + |
1993 | + return {id_ : sort_key_for_book(id_) for id_ in all_book_ids} |
1994 | + |
1995 | + |
1996 | +class AuthorsField(ManyToManyField): |
1997 | + |
1998 | + def author_data(self, author_id): |
1999 | + return { |
2000 | + 'name' : self.table.id_map[author_id], |
2001 | + 'sort' : self.table.asort_map[author_id], |
2002 | + 'link' : self.table.alink_map[author_id], |
2003 | + } |
2004 | + |
2005 | +class FormatsField(ManyToManyField): |
2006 | + |
2007 | + def format_fname(self, book_id, fmt): |
2008 | + return self.table.fname_map[book_id][fmt.upper()] |
2009 | + |
2010 | +def create_field(name, table): |
2011 | + cls = { |
2012 | + ONE_ONE : OneToOneField, |
2013 | + MANY_ONE : ManyToOneField, |
2014 | + MANY_MANY : ManyToManyField, |
2015 | + }[table.table_type] |
2016 | + if name == 'authors': |
2017 | + cls = AuthorsField |
2018 | + elif name == 'ondevice': |
2019 | + cls = OnDeviceField |
2020 | + elif name == 'formats': |
2021 | + cls = FormatsField |
2022 | + elif table.metadata['datatype'] == 'composite': |
2023 | + cls = CompositeField |
2024 | + return cls(name, table) |
2025 | + |
2026 | |
2027 | === modified file 'src/calibre/db/locking.py' |
2028 | --- src/calibre/db/locking.py 2011-07-10 21:12:06 +0000 |
2029 | +++ src/calibre/db/locking.py 2011-07-19 06:18:03 +0000 |
2030 | @@ -7,7 +7,9 @@ |
2031 | __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
2032 | __docformat__ = 'restructuredtext en' |
2033 | |
2034 | -from threading import Lock, Condition, current_thread |
2035 | +from threading import Lock, Condition, current_thread, RLock |
2036 | +from functools import partial |
2037 | +from collections import Counter |
2038 | |
2039 | class LockingError(RuntimeError): |
2040 | pass |
2041 | @@ -37,7 +39,7 @@ |
2042 | l = SHLock() |
2043 | return RWLockWrapper(l), RWLockWrapper(l, is_shared=False) |
2044 | |
2045 | -class SHLock(object): |
2046 | +class SHLock(object): # {{{ |
2047 | ''' |
2048 | Shareable lock class. Used to implement the Multiple readers-single writer |
2049 | paradigm. As best as I can tell, neither writer nor reader starvation |
2050 | @@ -79,6 +81,11 @@ |
2051 | return self._acquire_exclusive(blocking) |
2052 | assert not (self.is_shared and self.is_exclusive) |
2053 | |
2054 | + def owns_lock(self): |
2055 | + me = current_thread() |
2056 | + with self._lock: |
2057 | + return self._exclusive_owner is me or me in self._shared_owners |
2058 | + |
2059 | def release(self): |
2060 | ''' Release the lock. ''' |
2061 | # This decrements the appropriate lock counters, and if the lock |
2062 | @@ -189,6 +196,8 @@ |
2063 | def _return_waiter(self, waiter): |
2064 | self._free_waiters.append(waiter) |
2065 | |
2066 | +# }}} |
2067 | + |
2068 | class RWLockWrapper(object): |
2069 | |
2070 | def __init__(self, shlock, is_shared=True): |
2071 | @@ -200,16 +209,124 @@ |
2072 | return self |
2073 | |
2074 | def __exit__(self, *args): |
2075 | + self.release() |
2076 | + |
2077 | + def release(self): |
2078 | self._shlock.release() |
2079 | |
2080 | + def owns_lock(self): |
2081 | + return self._shlock.owns_lock() |
2082 | + |
2083 | +class RecordLock(object): |
2084 | + |
2085 | + ''' |
2086 | + Lock records identified by hashable ids. To use |
2087 | + |
2088 | + rl = RecordLock() |
2089 | + |
2090 | + with rl.lock(some_id): |
2091 | + # do something |
2092 | + |
2093 | + This will lock the record identified by some_id exclusively. The lock is |
2094 | + recursive, which means that you can lock the same record multiple times in |
2095 | + the same thread. |
2096 | + |
2097 | + This class co-operates with the SHLock class. If you try to lock a record |
2098 | + in a thread that already holds the SHLock, a LockingError is raised. This |
2099 | + is to prevent the possibility of a cross-lock deadlock. |
2100 | + |
2101 | + A cross-lock deadlock is still possible if you first lock a record and then |
2102 | + acquire the SHLock, but the usage pattern for this lock makes this highly |
2103 | + unlikely (this lock should be acquired immediately before any file I/O on |
2104 | + files in the library and released immediately after). |
2105 | + ''' |
2106 | + |
2107 | + class Wrap(object): |
2108 | + |
2109 | + def __init__(self, release): |
2110 | + self.release = release |
2111 | + |
2112 | + def __enter__(self): |
2113 | + return self |
2114 | + |
2115 | + def __exit__(self, *args, **kwargs): |
2116 | + self.release() |
2117 | + self.release = None |
2118 | + |
2119 | + def __init__(self, sh_lock): |
2120 | + self._lock = Lock() |
2121 | + # This is for recycling lock objects. |
2122 | + self._free_locks = [RLock()] |
2123 | + self._records = {} |
2124 | + self._counter = Counter() |
2125 | + self.sh_lock = sh_lock |
2126 | + |
2127 | + def lock(self, record_id): |
2128 | + if self.sh_lock.owns_lock(): |
2129 | + raise LockingError('Current thread already holds a shared lock,' |
2130 | + ' you cannot also ask for record lock as this could cause a' |
2131 | + ' deadlock.') |
2132 | + with self._lock: |
2133 | + l = self._records.get(record_id, None) |
2134 | + if l is None: |
2135 | + l = self._take_lock() |
2136 | + self._records[record_id] = l |
2137 | + self._counter[record_id] += 1 |
2138 | + l.acquire() |
2139 | + return RecordLock.Wrap(partial(self.release, record_id)) |
2140 | + |
2141 | + def release(self, record_id): |
2142 | + with self._lock: |
2143 | + l = self._records.pop(record_id, None) |
2144 | + if l is None: |
2145 | + raise LockingError('No lock acquired for record %r'%record_id) |
2146 | + l.release() |
2147 | + self._counter[record_id] -= 1 |
2148 | + if self._counter[record_id] > 0: |
2149 | + self._records[record_id] = l |
2150 | + else: |
2151 | + self._return_lock(l) |
2152 | + |
2153 | + def _take_lock(self): |
2154 | + try: |
2155 | + return self._free_locks.pop() |
2156 | + except IndexError: |
2157 | + return RLock() |
2158 | + |
2159 | + def _return_lock(self, lock): |
2160 | + self._free_locks.append(lock) |
2161 | |
2162 | # Tests {{{ |
2163 | if __name__ == '__main__': |
2164 | import time, random, unittest |
2165 | from threading import Thread |
2166 | |
2167 | - class TestSHLock(unittest.TestCase): |
2168 | - """Testcases for SHLock class.""" |
2169 | + class TestLock(unittest.TestCase): |
2170 | + """Testcases for Lock classes.""" |
2171 | + |
2172 | + def test_owns_locks(self): |
2173 | + lock = SHLock() |
2174 | + self.assertFalse(lock.owns_lock()) |
2175 | + lock.acquire(shared=True) |
2176 | + self.assertTrue(lock.owns_lock()) |
2177 | + lock.release() |
2178 | + self.assertFalse(lock.owns_lock()) |
2179 | + lock.acquire(shared=False) |
2180 | + self.assertTrue(lock.owns_lock()) |
2181 | + lock.release() |
2182 | + self.assertFalse(lock.owns_lock()) |
2183 | + |
2184 | + done = [] |
2185 | + def test(): |
2186 | + if not lock.owns_lock(): |
2187 | + done.append(True) |
2188 | + lock.acquire() |
2189 | + t = Thread(target=test) |
2190 | + t.daemon = True |
2191 | + t.start() |
2192 | + t.join(1) |
2193 | + self.assertEqual(len(done), 1) |
2194 | + lock.release() |
2195 | |
2196 | def test_multithread_deadlock(self): |
2197 | lock = SHLock() |
2198 | @@ -345,8 +462,38 @@ |
2199 | self.assertFalse(lock.is_shared) |
2200 | self.assertFalse(lock.is_exclusive) |
2201 | |
2202 | - |
2203 | - suite = unittest.TestLoader().loadTestsFromTestCase(TestSHLock) |
2204 | + def test_record_lock(self): |
2205 | + shlock = SHLock() |
2206 | + lock = RecordLock(shlock) |
2207 | + |
2208 | + shlock.acquire() |
2209 | + self.assertRaises(LockingError, lock.lock, 1) |
2210 | + shlock.release() |
2211 | + with lock.lock(1): |
2212 | + with lock.lock(1): |
2213 | + pass |
2214 | + |
2215 | + def dolock(): |
2216 | + with lock.lock(1): |
2217 | + time.sleep(0.1) |
2218 | + |
2219 | + t = Thread(target=dolock) |
2220 | + t.daemon = True |
2221 | + with lock.lock(1): |
2222 | + t.start() |
2223 | + t.join(0.2) |
2224 | + self.assertTrue(t.is_alive()) |
2225 | + t.join(0.11) |
2226 | + self.assertFalse(t.is_alive()) |
2227 | + |
2228 | + t = Thread(target=dolock) |
2229 | + t.daemon = True |
2230 | + with lock.lock(2): |
2231 | + t.start() |
2232 | + t.join(0.11) |
2233 | + self.assertFalse(t.is_alive()) |
2234 | + |
2235 | + suite = unittest.TestLoader().loadTestsFromTestCase(TestLock) |
2236 | unittest.TextTestRunner(verbosity=2).run(suite) |
2237 | |
2238 | # }}} |
2239 | |
2240 | === modified file 'src/calibre/db/tables.py' |
2241 | --- src/calibre/db/tables.py 2011-07-03 16:16:09 +0000 |
2242 | +++ src/calibre/db/tables.py 2011-07-19 06:18:03 +0000 |
2243 | @@ -17,6 +17,8 @@ |
2244 | |
2245 | _c_speedup = plugins['speedup'][0] |
2246 | |
2247 | +ONE_ONE, MANY_ONE, MANY_MANY = xrange(3) |
2248 | + |
2249 | def _c_convert_timestamp(val): |
2250 | if not val: |
2251 | return None |
2252 | @@ -57,6 +59,8 @@ |
2253 | timestamp, size, etc. |
2254 | ''' |
2255 | |
2256 | + table_type = ONE_ONE |
2257 | + |
2258 | def read(self, db): |
2259 | self.book_col_map = {} |
2260 | idcol = 'id' if self.metadata['table'] == 'books' else 'book' |
2261 | @@ -73,6 +77,17 @@ |
2262 | 'WHERE data.book=books.id) FROM books'): |
2263 | self.book_col_map[row[0]] = self.unserialize(row[1]) |
2264 | |
2265 | +class CompositeTable(OneToOneTable): |
2266 | + |
2267 | + def read(self, db): |
2268 | + self.book_col_map = {} |
2269 | + d = self.metadata['display'] |
2270 | + self.composite_template = ['composite_template'] |
2271 | + self.contains_html = d['contains_html'] |
2272 | + self.make_category = d['make_category'] |
2273 | + self.composite_sort = d['composite_sort'] |
2274 | + self.use_decorations = d['use_decorations'] |
2275 | + |
2276 | class ManyToOneTable(Table): |
2277 | |
2278 | ''' |
2279 | @@ -82,9 +97,10 @@ |
2280 | Each book however has only one value for data of this type. |
2281 | ''' |
2282 | |
2283 | + table_type = MANY_ONE |
2284 | + |
2285 | def read(self, db): |
2286 | self.id_map = {} |
2287 | - self.extra_map = {} |
2288 | self.col_book_map = {} |
2289 | self.book_col_map = {} |
2290 | self.read_id_maps(db) |
2291 | @@ -105,6 +121,9 @@ |
2292 | self.col_book_map[row[1]].append(row[0]) |
2293 | self.book_col_map[row[0]] = row[1] |
2294 | |
2295 | + for key in tuple(self.col_book_map.iterkeys()): |
2296 | + self.col_book_map[key] = tuple(self.col_book_map[key]) |
2297 | + |
2298 | class ManyToManyTable(ManyToOneTable): |
2299 | |
2300 | ''' |
2301 | @@ -113,6 +132,8 @@ |
2302 | book. For example: tags or authors. |
2303 | ''' |
2304 | |
2305 | + table_type = MANY_MANY |
2306 | + |
2307 | def read_maps(self, db): |
2308 | for row in db.conn.execute( |
2309 | 'SELECT book, {0} FROM {1}'.format( |
2310 | @@ -124,14 +145,21 @@ |
2311 | self.book_col_map[row[0]] = [] |
2312 | self.book_col_map[row[0]].append(row[1]) |
2313 | |
2314 | + for key in tuple(self.col_book_map.iterkeys()): |
2315 | + self.col_book_map[key] = tuple(self.col_book_map[key]) |
2316 | + |
2317 | + for key in tuple(self.book_col_map.iterkeys()): |
2318 | + self.book_col_map[key] = tuple(self.book_col_map[key]) |
2319 | + |
2320 | class AuthorsTable(ManyToManyTable): |
2321 | |
2322 | def read_id_maps(self, db): |
2323 | self.alink_map = {} |
2324 | + self.asort_map = {} |
2325 | for row in db.conn.execute( |
2326 | 'SELECT id, name, sort, link FROM authors'): |
2327 | self.id_map[row[0]] = row[1] |
2328 | - self.extra_map[row[0]] = (row[2] if row[2] else |
2329 | + self.asort_map[row[0]] = (row[2] if row[2] else |
2330 | author_to_author_sort(row[1])) |
2331 | self.alink_map[row[0]] = row[3] |
2332 | |
2333 | @@ -141,14 +169,25 @@ |
2334 | pass |
2335 | |
2336 | def read_maps(self, db): |
2337 | + self.fname_map = {} |
2338 | for row in db.conn.execute('SELECT book, format, name FROM data'): |
2339 | if row[1] is not None: |
2340 | - if row[1] not in self.col_book_map: |
2341 | - self.col_book_map[row[1]] = [] |
2342 | - self.col_book_map[row[1]].append(row[0]) |
2343 | + fmt = row[1].upper() |
2344 | + if fmt not in self.col_book_map: |
2345 | + self.col_book_map[fmt] = [] |
2346 | + self.col_book_map[fmt].append(row[0]) |
2347 | if row[0] not in self.book_col_map: |
2348 | self.book_col_map[row[0]] = [] |
2349 | - self.book_col_map[row[0]].append((row[1], row[2])) |
2350 | + self.book_col_map[row[0]].append(fmt) |
2351 | + if row[0] not in self.fname_map: |
2352 | + self.fname_map[row[0]] = {} |
2353 | + self.fname_map[row[0]][fmt] = row[2] |
2354 | + |
2355 | + for key in tuple(self.col_book_map.iterkeys()): |
2356 | + self.col_book_map[key] = tuple(self.col_book_map[key]) |
2357 | + |
2358 | + for key in tuple(self.book_col_map.iterkeys()): |
2359 | + self.book_col_map[key] = tuple(self.book_col_map[key]) |
2360 | |
2361 | class IdentifiersTable(ManyToManyTable): |
2362 | |
2363 | @@ -162,6 +201,9 @@ |
2364 | self.col_book_map[row[1]] = [] |
2365 | self.col_book_map[row[1]].append(row[0]) |
2366 | if row[0] not in self.book_col_map: |
2367 | - self.book_col_map[row[0]] = [] |
2368 | - self.book_col_map[row[0]].append((row[1], row[2])) |
2369 | + self.book_col_map[row[0]] = {} |
2370 | + self.book_col_map[row[0]][row[1]] = row[2] |
2371 | + |
2372 | + for key in tuple(self.col_book_map.iterkeys()): |
2373 | + self.col_book_map[key] = tuple(self.col_book_map[key]) |
2374 | |
2375 | |
2376 | === added file 'src/calibre/db/view.py' |
2377 | --- src/calibre/db/view.py 1970-01-01 00:00:00 +0000 |
2378 | +++ src/calibre/db/view.py 2011-07-19 06:18:03 +0000 |
2379 | @@ -0,0 +1,109 @@ |
2380 | +#!/usr/bin/env python |
2381 | +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai |
2382 | +from __future__ import (unicode_literals, division, absolute_import, |
2383 | + print_function) |
2384 | + |
2385 | +__license__ = 'GPL v3' |
2386 | +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
2387 | +__docformat__ = 'restructuredtext en' |
2388 | + |
2389 | +from functools import partial |
2390 | + |
2391 | +def sanitize_sort_field_name(field_metadata, field): |
2392 | + field = field_metadata.search_term_to_field_key(field.lower().strip()) |
2393 | + # translate some fields to their hidden equivalent |
2394 | + field = {'title': 'sort', 'authors':'author_sort'}.get(field, field) |
2395 | + return field |
2396 | + |
2397 | +class View(object): |
2398 | + |
2399 | + def __init__(self, cache): |
2400 | + self.cache = cache |
2401 | + self.marked_ids = {} |
2402 | + self._field_getters = {} |
2403 | + for col, idx in cache.backend.FIELD_MAP.iteritems(): |
2404 | + if isinstance(col, int): |
2405 | + label = self.cache.backend.custom_column_num_map[col]['label'] |
2406 | + label = (self.cache.backend.field_metadata.custom_field_prefix |
2407 | + + label) |
2408 | + self._field_getters[idx] = partial(self.get, label) |
2409 | + else: |
2410 | + try: |
2411 | + self._field_getters[idx] = { |
2412 | + 'id' : self._get_id, |
2413 | + 'au_map' : self.get_author_data, |
2414 | + 'ondevice': self.get_ondevice, |
2415 | + 'marked' : self.get_marked, |
2416 | + }[col] |
2417 | + except KeyError: |
2418 | + self._field_getters[idx] = partial(self.get, col) |
2419 | + |
2420 | + self._map = list(self.cache.all_book_ids()) |
2421 | + self._map_filtered = list(self._map) |
2422 | + |
2423 | + @property |
2424 | + def field_metadata(self): |
2425 | + return self.cache.field_metadata |
2426 | + |
2427 | + def _get_id(self, idx, index_is_id=True): |
2428 | + ans = idx if index_is_id else self.index_to_id(idx) |
2429 | + return ans |
2430 | + |
2431 | + def get_field_map_field(self, row, col, index_is_id=True): |
2432 | + ''' |
2433 | + Supports the legacy FIELD_MAP interface for getting metadata. Do not use |
2434 | + in new code. |
2435 | + ''' |
2436 | + getter = self._field_getters[col] |
2437 | + return getter(row, index_is_id=index_is_id) |
2438 | + |
2439 | + def index_to_id(self, idx): |
2440 | + return self._map_filtered[idx] |
2441 | + |
2442 | + def get(self, field, idx, index_is_id=True, default_value=None): |
2443 | + id_ = idx if index_is_id else self.index_to_id(idx) |
2444 | + return self.cache.field_for(field, id_) |
2445 | + |
2446 | + def get_ondevice(self, idx, index_is_id=True, default_value=''): |
2447 | + id_ = idx if index_is_id else self.index_to_id(idx) |
2448 | + self.cache.field_for('ondevice', id_, default_value=default_value) |
2449 | + |
2450 | + def get_marked(self, idx, index_is_id=True, default_value=None): |
2451 | + id_ = idx if index_is_id else self.index_to_id(idx) |
2452 | + return self.marked_ids.get(id_, default_value) |
2453 | + |
2454 | + def get_author_data(self, idx, index_is_id=True, default_value=()): |
2455 | + ''' |
2456 | + Return author data for all authors of the book identified by idx as a |
2457 | + tuple of dictionaries. The dictionaries should never be empty, unless |
2458 | + there is a bug somewhere. The list could be empty if idx point to an |
2459 | + non existent book, or book with no authors (though again a book with no |
2460 | + authors should never happen). |
2461 | + |
2462 | + Each dictionary has the keys: name, sort, link. Link can be an empty |
2463 | + string. |
2464 | + |
2465 | + default_value is ignored, this method always returns a tuple |
2466 | + ''' |
2467 | + id_ = idx if index_is_id else self.index_to_id(idx) |
2468 | + with self.cache.read_lock: |
2469 | + ids = self.cache._field_ids_for('authors', id_) |
2470 | + ans = [] |
2471 | + for id_ in ids: |
2472 | + ans.append(self.cache._author_data(id_)) |
2473 | + return tuple(ans) |
2474 | + |
2475 | + def multisort(self, fields=[], subsort=False): |
2476 | + fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields] |
2477 | + keys = self.field_metadata.sortable_field_keys() |
2478 | + fields = [x for x in fields if x[0] in keys] |
2479 | + if subsort and 'sort' not in [x[0] for x in fields]: |
2480 | + fields += [('sort', True)] |
2481 | + if not fields: |
2482 | + fields = [('timestamp', False)] |
2483 | + |
2484 | + sorted_book_ids = self.cache.multisort(fields) |
2485 | + sorted_book_ids |
2486 | + # TODO: change maps |
2487 | + |
2488 | + |
2489 | |
2490 | === modified file 'src/calibre/devices/android/driver.py' |
2491 | --- src/calibre/devices/android/driver.py 2011-07-10 15:37:25 +0000 |
2492 | +++ src/calibre/devices/android/driver.py 2011-07-19 06:18:03 +0000 |
2493 | @@ -39,7 +39,7 @@ |
2494 | 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100], |
2495 | 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216], |
2496 | 0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216], |
2497 | - 0x7086 : [0x0226], 0x70a8: [0x9999], |
2498 | + 0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216], |
2499 | }, |
2500 | |
2501 | # Sony Ericsson |
2502 | @@ -60,6 +60,7 @@ |
2503 | 0x685e : [0x0400], |
2504 | 0x6860 : [0x0400], |
2505 | 0x6877 : [0x0400], |
2506 | + 0x689e : [0x0400], |
2507 | }, |
2508 | |
2509 | # Viewsonic |
2510 | @@ -124,7 +125,8 @@ |
2511 | 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD', |
2512 | '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2', |
2513 | 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK', |
2514 | - 'MB525', 'ANDROID2.3', 'SGH-I997'] |
2515 | + 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612', |
2516 | + 'GT-S5830_CARD'] |
2517 | WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897', |
2518 | 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD', |
2519 | 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD', |
2520 | |
2521 | === modified file 'src/calibre/devices/kobo/driver.py' |
2522 | --- src/calibre/devices/kobo/driver.py 2011-07-10 00:37:28 +0000 |
2523 | +++ src/calibre/devices/kobo/driver.py 2011-07-19 06:18:03 +0000 |
2524 | @@ -7,6 +7,7 @@ |
2525 | |
2526 | import os |
2527 | import sqlite3 as sqlite |
2528 | +from contextlib import closing |
2529 | |
2530 | from calibre.devices.usbms.books import BookList |
2531 | from calibre.devices.kobo.books import Book |
2532 | @@ -22,7 +23,7 @@ |
2533 | gui_name = 'Kobo Reader' |
2534 | description = _('Communicate with the Kobo Reader') |
2535 | author = 'Timothy Legge' |
2536 | - version = (1, 0, 9) |
2537 | + version = (1, 0, 10) |
2538 | |
2539 | dbversion = 0 |
2540 | fwversion = 0 |
2541 | @@ -48,12 +49,16 @@ |
2542 | |
2543 | VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo']) |
2544 | |
2545 | - EXTRA_CUSTOMIZATION_MESSAGE = _('The Kobo supports only one collection ' |
2546 | - 'currently: the \"Im_Reading\" list. Create a tag called \"Im_Reading\" ')+\ |
2547 | - 'for automatic management' |
2548 | + EXTRA_CUSTOMIZATION_MESSAGE = [ |
2549 | + _('The Kobo supports several collections including ')+\ |
2550 | + 'Read, Closed, Im_Reading ' +\ |
2551 | + _('Create tags for automatic management'), |
2552 | + ] |
2553 | |
2554 | EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(['tags']) |
2555 | |
2556 | + OPT_COLLECTIONS = 0 |
2557 | + |
2558 | def initialize(self): |
2559 | USBMS.initialize(self) |
2560 | self.book_class = Book |
2561 | @@ -188,77 +193,78 @@ |
2562 | traceback.print_exc() |
2563 | return changed |
2564 | |
2565 | - connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')) |
2566 | - |
2567 | - # return bytestrings if the content cannot the decoded as unicode |
2568 | - connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") |
2569 | - |
2570 | - cursor = connection.cursor() |
2571 | - |
2572 | - #query = 'select count(distinct volumeId) from volume_shortcovers' |
2573 | - #cursor.execute(query) |
2574 | - #for row in (cursor): |
2575 | - # numrows = row[0] |
2576 | - #cursor.close() |
2577 | - |
2578 | - # Determine the database version |
2579 | - # 4 - Bluetooth Kobo Rev 2 (1.4) |
2580 | - # 8 - WIFI KOBO Rev 1 |
2581 | - cursor.execute('select version from dbversion') |
2582 | - result = cursor.fetchone() |
2583 | - self.dbversion = result[0] |
2584 | - |
2585 | - debug_print("Database Version: ", self.dbversion) |
2586 | - if self.dbversion >= 16: |
2587 | - query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2588 | - 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \ |
2589 | - 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' |
2590 | - elif self.dbversion < 16 and self.dbversion >= 14: |
2591 | - query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2592 | - 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \ |
2593 | - 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' |
2594 | - elif self.dbversion < 14 and self.dbversion >= 8: |
2595 | - query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2596 | - 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \ |
2597 | - 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' |
2598 | - else: |
2599 | - query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2600 | - 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null' |
2601 | - |
2602 | - try: |
2603 | - cursor.execute (query) |
2604 | - except Exception as e: |
2605 | - err = str(e) |
2606 | - if not ('___ExpirationStatus' in err or 'FavouritesIndex' in err or |
2607 | - 'Accessibility' in err): |
2608 | - raise |
2609 | - query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' |
2610 | - 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as ' |
2611 | - 'FavouritesIndex, "-1" as Accessibility from content where ' |
2612 | - 'BookID is Null') |
2613 | - cursor.execute(query) |
2614 | - |
2615 | - changed = False |
2616 | - for i, row in enumerate(cursor): |
2617 | - # self.report_progress((i+1) / float(numrows), _('Getting list of books on device...')) |
2618 | - if row[3].startswith("file:///usr/local/Kobo/help/"): |
2619 | - # These are internal to the Kobo device and do not exist |
2620 | - continue |
2621 | - path = self.path_from_contentid(row[3], row[5], row[4], oncard) |
2622 | - mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip' |
2623 | - # debug_print("mime:", mime) |
2624 | - |
2625 | - if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"): |
2626 | - changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10]) |
2627 | - # print "shortbook: " + path |
2628 | - elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"): |
2629 | - changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10]) |
2630 | - |
2631 | - if changed: |
2632 | - need_sync = True |
2633 | - |
2634 | - cursor.close() |
2635 | - connection.close() |
2636 | + with closing(sqlite.connect( |
2637 | + self.normalize_path(self._main_prefix + |
2638 | + '.kobo/KoboReader.sqlite'))) as connection: |
2639 | + |
2640 | + # return bytestrings if the content cannot the decoded as unicode |
2641 | + connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") |
2642 | + |
2643 | + cursor = connection.cursor() |
2644 | + |
2645 | + #query = 'select count(distinct volumeId) from volume_shortcovers' |
2646 | + #cursor.execute(query) |
2647 | + #for row in (cursor): |
2648 | + # numrows = row[0] |
2649 | + #cursor.close() |
2650 | + |
2651 | + # Determine the database version |
2652 | + # 4 - Bluetooth Kobo Rev 2 (1.4) |
2653 | + # 8 - WIFI KOBO Rev 1 |
2654 | + cursor.execute('select version from dbversion') |
2655 | + result = cursor.fetchone() |
2656 | + self.dbversion = result[0] |
2657 | + |
2658 | + debug_print("Database Version: ", self.dbversion) |
2659 | + if self.dbversion >= 16: |
2660 | + query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2661 | + 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \ |
2662 | + 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' |
2663 | + elif self.dbversion < 16 and self.dbversion >= 14: |
2664 | + query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2665 | + 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \ |
2666 | + 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' |
2667 | + elif self.dbversion < 14 and self.dbversion >= 8: |
2668 | + query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2669 | + 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \ |
2670 | + 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)' |
2671 | + else: |
2672 | + query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \ |
2673 | + 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null' |
2674 | + |
2675 | + try: |
2676 | + cursor.execute (query) |
2677 | + except Exception as e: |
2678 | + err = str(e) |
2679 | + if not ('___ExpirationStatus' in err or 'FavouritesIndex' in err or |
2680 | + 'Accessibility' in err): |
2681 | + raise |
2682 | + query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' |
2683 | + 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as ' |
2684 | + 'FavouritesIndex, "-1" as Accessibility from content where ' |
2685 | + 'BookID is Null') |
2686 | + cursor.execute(query) |
2687 | + |
2688 | + changed = False |
2689 | + for i, row in enumerate(cursor): |
2690 | + # self.report_progress((i+1) / float(numrows), _('Getting list of books on device...')) |
2691 | + if row[3].startswith("file:///usr/local/Kobo/help/"): |
2692 | + # These are internal to the Kobo device and do not exist |
2693 | + continue |
2694 | + path = self.path_from_contentid(row[3], row[5], row[4], oncard) |
2695 | + mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip' |
2696 | + # debug_print("mime:", mime) |
2697 | + |
2698 | + if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"): |
2699 | + changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10]) |
2700 | + # print "shortbook: " + path |
2701 | + elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"): |
2702 | + changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10]) |
2703 | + |
2704 | + if changed: |
2705 | + need_sync = True |
2706 | + |
2707 | + cursor.close() |
2708 | |
2709 | # Remove books that are no longer in the filesystem. Cache contains |
2710 | # indices into the booklist if book not in filesystem, None otherwise |
2711 | @@ -288,56 +294,56 @@ |
2712 | # 2) content |
2713 | |
2714 | debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType) |
2715 | - connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')) |
2716 | - |
2717 | - # return bytestrings if the content cannot the decoded as unicode |
2718 | - connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") |
2719 | - |
2720 | - cursor = connection.cursor() |
2721 | - t = (ContentID,) |
2722 | - cursor.execute('select ImageID from content where ContentID = ?', t) |
2723 | - |
2724 | - ImageID = None |
2725 | - for row in cursor: |
2726 | - # First get the ImageID to delete the images |
2727 | - ImageID = row[0] |
2728 | - cursor.close() |
2729 | - |
2730 | - cursor = connection.cursor() |
2731 | - if ContentType == 6 and self.dbversion < 8: |
2732 | - # Delete the shortcover_pages first |
2733 | - cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t) |
2734 | - |
2735 | - #Delete the volume_shortcovers second |
2736 | - cursor.execute('delete from volume_shortcovers where volumeid = ?', t) |
2737 | - |
2738 | - # Delete the rows from content_keys |
2739 | - if self.dbversion >= 8: |
2740 | - cursor.execute('delete from content_keys where volumeid = ?', t) |
2741 | - |
2742 | - # Delete the chapters associated with the book next |
2743 | - t = (ContentID,) |
2744 | - # Kobo does not delete the Book row (ie the row where the BookID is Null) |
2745 | - # The next server sync should remove the row |
2746 | - cursor.execute('delete from content where BookID = ?', t) |
2747 | - try: |
2748 | - cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0, ___ExpirationStatus=3 ' \ |
2749 | - 'where BookID is Null and ContentID =?',t) |
2750 | - except Exception as e: |
2751 | - if 'no such column' not in str(e): |
2752 | - raise |
2753 | - cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \ |
2754 | - 'where BookID is Null and ContentID =?',t) |
2755 | - |
2756 | - |
2757 | - connection.commit() |
2758 | - |
2759 | - cursor.close() |
2760 | - if ImageID == None: |
2761 | - print "Error condition ImageID was not found" |
2762 | - print "You likely tried to delete a book that the kobo has not yet added to the database" |
2763 | - |
2764 | - connection.close() |
2765 | + with closing(sqlite.connect(self.normalize_path(self._main_prefix + |
2766 | + '.kobo/KoboReader.sqlite'))) as connection: |
2767 | + |
2768 | + # return bytestrings if the content cannot the decoded as unicode |
2769 | + connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") |
2770 | + |
2771 | + cursor = connection.cursor() |
2772 | + t = (ContentID,) |
2773 | + cursor.execute('select ImageID from content where ContentID = ?', t) |
2774 | + |
2775 | + ImageID = None |
2776 | + for row in cursor: |
2777 | + # First get the ImageID to delete the images |
2778 | + ImageID = row[0] |
2779 | + cursor.close() |
2780 | + |
2781 | + cursor = connection.cursor() |
2782 | + if ContentType == 6 and self.dbversion < 8: |
2783 | + # Delete the shortcover_pages first |
2784 | + cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t) |
2785 | + |
2786 | + #Delete the volume_shortcovers second |
2787 | + cursor.execute('delete from volume_shortcovers where volumeid = ?', t) |
2788 | + |
2789 | + # Delete the rows from content_keys |
2790 | + if self.dbversion >= 8: |
2791 | + cursor.execute('delete from content_keys where volumeid = ?', t) |
2792 | + |
2793 | + # Delete the chapters associated with the book next |
2794 | + t = (ContentID,) |
2795 | + # Kobo does not delete the Book row (ie the row where the BookID is Null) |
2796 | + # The next server sync should remove the row |
2797 | + cursor.execute('delete from content where BookID = ?', t) |
2798 | + try: |
2799 | + cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0, ___ExpirationStatus=3 ' \ |
2800 | + 'where BookID is Null and ContentID =?',t) |
2801 | + except Exception as e: |
2802 | + if 'no such column' not in str(e): |
2803 | + raise |
2804 | + cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \ |
2805 | + 'where BookID is Null and ContentID =?',t) |
2806 | + |
2807 | + |
2808 | + connection.commit() |
2809 | + |
2810 | + cursor.close() |
2811 | + if ImageID == None: |
2812 | + print "Error condition ImageID was not found" |
2813 | + print "You likely tried to delete a book that the kobo has not yet added to the database" |
2814 | + |
2815 | # If all this succeeds we need to delete the images files via the ImageID |
2816 | return ImageID |
2817 | |
2818 | @@ -664,50 +670,49 @@ |
2819 | # Needs to be outside books collection as in the case of removing |
2820 | # the last book from the collection the list of books is empty |
2821 | # and the removal of the last book would not occur |
2822 | - connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite')) |
2823 | - |
2824 | - # return bytestrings if the content cannot the decoded as unicode |
2825 | - connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") |
2826 | - |
2827 | - if collections: |
2828 | - |
2829 | - # Need to reset the collections outside the particular loops |
2830 | - # otherwise the last item will not be removed |
2831 | - self.reset_readstatus(connection, oncard) |
2832 | - if self.dbversion >= 14: |
2833 | - self.reset_favouritesindex(connection, oncard) |
2834 | - |
2835 | - # Process any collections that exist |
2836 | - for category, books in collections.items(): |
2837 | - debug_print("Category: ", category, " id = ", readstatuslist.get(category)) |
2838 | - for book in books: |
2839 | - debug_print(' Title:', book.title, 'category: ', category) |
2840 | - if category not in book.device_collections: |
2841 | - book.device_collections.append(category) |
2842 | - |
2843 | - extension = os.path.splitext(book.path)[1] |
2844 | - ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) |
2845 | - |
2846 | - ContentID = self.contentid_from_path(book.path, ContentType) |
2847 | - |
2848 | - if category in readstatuslist.keys(): |
2849 | - # Manage ReadStatus |
2850 | - self.set_readstatus(connection, ContentID, readstatuslist.get(category)) |
2851 | - if category == 'Shortlist' and self.dbversion >= 14: |
2852 | - # Manage FavouritesIndex/Shortlist |
2853 | - self.set_favouritesindex(connection, ContentID) |
2854 | - if category in accessibilitylist.keys(): |
2855 | - # Do not manage the Accessibility List |
2856 | - pass |
2857 | - else: # No collections |
2858 | - # Since no collections exist the ReadStatus needs to be reset to 0 (Unread) |
2859 | - debug_print("No Collections - reseting ReadStatus") |
2860 | - self.reset_readstatus(connection, oncard) |
2861 | - if self.dbversion >= 14: |
2862 | - debug_print("No Collections - reseting FavouritesIndex") |
2863 | - self.reset_favouritesindex(connection, oncard) |
2864 | - |
2865 | - connection.close() |
2866 | + with closing(sqlite.connect(self.normalize_path(self._main_prefix + |
2867 | + '.kobo/KoboReader.sqlite'))) as connection: |
2868 | + |
2869 | + # return bytestrings if the content cannot the decoded as unicode |
2870 | + connection.text_factory = lambda x: unicode(x, "utf-8", "ignore") |
2871 | + |
2872 | + if collections: |
2873 | + |
2874 | + # Need to reset the collections outside the particular loops |
2875 | + # otherwise the last item will not be removed |
2876 | + self.reset_readstatus(connection, oncard) |
2877 | + if self.dbversion >= 14: |
2878 | + self.reset_favouritesindex(connection, oncard) |
2879 | + |
2880 | + # Process any collections that exist |
2881 | + for category, books in collections.items(): |
2882 | + debug_print("Category: ", category, " id = ", readstatuslist.get(category)) |
2883 | + for book in books: |
2884 | + debug_print(' Title:', book.title, 'category: ', category) |
2885 | + if category not in book.device_collections: |
2886 | + book.device_collections.append(category) |
2887 | + |
2888 | + extension = os.path.splitext(book.path)[1] |
2889 | + ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path) |
2890 | + |
2891 | + ContentID = self.contentid_from_path(book.path, ContentType) |
2892 | + |
2893 | + if category in readstatuslist.keys(): |
2894 | + # Manage ReadStatus |
2895 | + self.set_readstatus(connection, ContentID, readstatuslist.get(category)) |
2896 | + elif category == 'Shortlist' and self.dbversion >= 14: |
2897 | + # Manage FavouritesIndex/Shortlist |
2898 | + self.set_favouritesindex(connection, ContentID) |
2899 | + elif category in accessibilitylist.keys(): |
2900 | + # Do not manage the Accessibility List |
2901 | + pass |
2902 | + else: # No collections |
2903 | + # Since no collections exist the ReadStatus needs to be reset to 0 (Unread) |
2904 | + debug_print("No Collections - reseting ReadStatus") |
2905 | + self.reset_readstatus(connection, oncard) |
2906 | + if self.dbversion >= 14: |
2907 | + debug_print("No Collections - reseting FavouritesIndex") |
2908 | + self.reset_favouritesindex(connection, oncard) |
2909 | |
2910 | # debug_print('Finished update_device_database_collections', collections_attributes) |
2911 | |
2912 | @@ -723,7 +728,7 @@ |
2913 | opts = self.settings() |
2914 | if opts.extra_customization: |
2915 | collections = [x.lower().strip() for x in |
2916 | - opts.extra_customization.split(',')] |
2917 | + opts.extra_customization[self.OPT_COLLECTIONS].split(',')] |
2918 | else: |
2919 | collections = [] |
2920 | |
2921 | |
2922 | === modified file 'src/calibre/devices/usbms/device.py' |
2923 | --- src/calibre/devices/usbms/device.py 2011-06-27 20:59:32 +0000 |
2924 | +++ src/calibre/devices/usbms/device.py 2011-07-19 06:18:03 +0000 |
2925 | @@ -1077,8 +1077,13 @@ |
2926 | settings = self.settings() |
2927 | template = self.save_template() |
2928 | if mdata.tags and _('News') in mdata.tags: |
2929 | - today = time.localtime() |
2930 | - template = "{title}_%d-%d-%d" % (today[0], today[1], today[2]) |
2931 | + try: |
2932 | + p = mdata.pubdate |
2933 | + date = (p.year, p.month, p.day) |
2934 | + except: |
2935 | + today = time.localtime() |
2936 | + date = (today[0], today[1], today[2]) |
2937 | + template = "{title}_%d-%d-%d" % date |
2938 | use_subdirs = self.SUPPORTS_SUB_DIRS and settings.use_subdirs |
2939 | |
2940 | fname = sanitize(fname) |
2941 | |
2942 | === modified file 'src/calibre/devices/usbms/driver.py' |
2943 | --- src/calibre/devices/usbms/driver.py 2011-06-09 16:10:44 +0000 |
2944 | +++ src/calibre/devices/usbms/driver.py 2011-07-19 06:18:03 +0000 |
2945 | @@ -94,11 +94,29 @@ |
2946 | self.report_progress(1.0, _('Get device information...')) |
2947 | self.driveinfo = {} |
2948 | if self._main_prefix is not None: |
2949 | - self.driveinfo['main'] = self._update_driveinfo_file(self._main_prefix, 'main') |
2950 | - if self._card_a_prefix is not None: |
2951 | - self.driveinfo['A'] = self._update_driveinfo_file(self._card_a_prefix, 'A') |
2952 | - if self._card_b_prefix is not None: |
2953 | - self.driveinfo['B'] = self._update_driveinfo_file(self._card_b_prefix, 'B') |
2954 | + try: |
2955 | + self.driveinfo['main'] = self._update_driveinfo_file(self._main_prefix, 'main') |
2956 | + except (IOError, OSError) as e: |
2957 | + raise IOError(_('Failed to access files in the main memory of' |
2958 | + ' your device. You should contact the device' |
2959 | + ' manufacturer for support. Common fixes are:' |
2960 | + ' try a different USB cable/USB port on your computer.' |
2961 | + ' If you device has a "Reset to factory defaults" type' |
2962 | + ' of setting somewhere, use it. Underlying error: %s') |
2963 | + % e) |
2964 | + try: |
2965 | + if self._card_a_prefix is not None: |
2966 | + self.driveinfo['A'] = self._update_driveinfo_file(self._card_a_prefix, 'A') |
2967 | + if self._card_b_prefix is not None: |
2968 | + self.driveinfo['B'] = self._update_driveinfo_file(self._card_b_prefix, 'B') |
2969 | + except (IOError, OSError) as e: |
2970 | + raise IOError(_('Failed to access files on the SD card in your' |
2971 | + ' device. This can happen for many reasons. The SD card may be' |
2972 | + ' corrupted, it may be too large for your device, it may be' |
2973 | + ' write-protected, etc. Try a different SD card, or reformat' |
2974 | + ' your SD card using the FAT32 filesystem. Also make sure' |
2975 | + ' there are not too many files in the root of your SD card.' |
2976 | + ' Underlying error: %s') % e) |
2977 | return (self.get_gui_name(), '', '', '', self.driveinfo) |
2978 | |
2979 | def set_driveinfo_name(self, location_code, name): |
2980 | |
2981 | === modified file 'src/calibre/ebooks/__init__.py' |
2982 | --- src/calibre/ebooks/__init__.py 2011-04-21 19:40:56 +0000 |
2983 | +++ src/calibre/ebooks/__init__.py 2011-07-19 06:18:03 +0000 |
2984 | @@ -159,7 +159,7 @@ |
2985 | return x |
2986 | |
2987 | def calibre_cover(title, author_string, series_string=None, |
2988 | - output_format='jpg', title_size=46, author_size=36): |
2989 | + output_format='jpg', title_size=46, author_size=36, logo_path=None): |
2990 | title = normalize(title) |
2991 | author_string = normalize(author_string) |
2992 | series_string = normalize(series_string) |
2993 | @@ -167,7 +167,9 @@ |
2994 | lines = [TextLine(title, title_size), TextLine(author_string, author_size)] |
2995 | if series_string: |
2996 | lines.append(TextLine(series_string, author_size)) |
2997 | - return create_cover_page(lines, I('library.png'), output_format='jpg') |
2998 | + if logo_path is None: |
2999 | + logo_path = I('library.png') |
3000 | + return create_cover_page(lines, logo_path, output_format='jpg') |
3001 | |
3002 | UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$') |
3003 | |
3004 | |
3005 | === modified file 'src/calibre/ebooks/chardet/__init__.py' |
3006 | --- src/calibre/ebooks/chardet/__init__.py 2011-04-02 16:40:58 +0000 |
3007 | +++ src/calibre/ebooks/chardet/__init__.py 2011-07-19 06:18:03 +0000 |
3008 | @@ -38,8 +38,12 @@ |
3009 | ENTITY_PATTERN = re.compile(r'&(\S+?);') |
3010 | |
3011 | def strip_encoding_declarations(raw): |
3012 | + limit = 50*1024 |
3013 | for pat in ENCODING_PATS: |
3014 | - raw = pat.sub('', raw) |
3015 | + prefix = raw[:limit] |
3016 | + suffix = raw[limit:] |
3017 | + prefix = pat.sub('', prefix) |
3018 | + raw = prefix + suffix |
3019 | return raw |
3020 | |
3021 | def substitute_entites(raw): |
3022 | |
3023 | === modified file 'src/calibre/ebooks/conversion/cli.py' |
3024 | --- src/calibre/ebooks/conversion/cli.py 2011-07-10 19:09:11 +0000 |
3025 | +++ src/calibre/ebooks/conversion/cli.py 2011-07-19 06:18:03 +0000 |
3026 | @@ -137,7 +137,9 @@ |
3027 | 'extra_css', 'smarten_punctuation', |
3028 | 'margin_top', 'margin_left', 'margin_right', |
3029 | 'margin_bottom', 'change_justification', |
3030 | - 'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size', |
3031 | + 'insert_blank_line', 'insert_blank_line_size', |
3032 | + 'remove_paragraph_spacing', |
3033 | + 'remove_paragraph_spacing_indent_size', |
3034 | 'asciiize', |
3035 | ] |
3036 | ), |
3037 | |
3038 | === modified file 'src/calibre/ebooks/conversion/plumber.py' |
3039 | --- src/calibre/ebooks/conversion/plumber.py 2011-07-06 18:10:08 +0000 |
3040 | +++ src/calibre/ebooks/conversion/plumber.py 2011-07-19 06:18:03 +0000 |
3041 | @@ -366,9 +366,9 @@ |
3042 | |
3043 | OptionRecommendation(name='remove_paragraph_spacing_indent_size', |
3044 | recommended_value=1.5, level=OptionRecommendation.LOW, |
3045 | - help=_('When calibre removes inter paragraph spacing, it automatically ' |
3046 | + help=_('When calibre removes blank lines between paragraphs, it automatically ' |
3047 | 'sets a paragraph indent, to ensure that paragraphs can be easily ' |
3048 | - 'distinguished. This option controls the width of that indent.') |
3049 | + 'distinguished. This option controls the width of that indent (in em).') |
3050 | ), |
3051 | |
3052 | OptionRecommendation(name='prefer_metadata_cover', |
3053 | @@ -384,6 +384,13 @@ |
3054 | ) |
3055 | ), |
3056 | |
3057 | +OptionRecommendation(name='insert_blank_line_size', |
3058 | + recommended_value=0.5, level=OptionRecommendation.LOW, |
3059 | + help=_('Set the height of the inserted blank lines (in em).' |
3060 | + ' The height of the lines between paragraphs will be twice the value' |
3061 | + ' set here.') |
3062 | + ), |
3063 | + |
3064 | OptionRecommendation(name='remove_first_image', |
3065 | recommended_value=False, level=OptionRecommendation.LOW, |
3066 | help=_('Remove the first image from the input ebook. Useful if the ' |
3067 | @@ -602,7 +609,7 @@ |
3068 | input_fmt = os.path.splitext(self.input)[1] |
3069 | if not input_fmt: |
3070 | raise ValueError('Input file must have an extension') |
3071 | - input_fmt = input_fmt[1:].lower() |
3072 | + input_fmt = input_fmt[1:].lower().replace('original_', '') |
3073 | self.archive_input_tdir = None |
3074 | if input_fmt in ARCHIVE_FMTS: |
3075 | self.log('Processing archive...') |
3076 | @@ -1048,6 +1055,7 @@ |
3077 | with self.output_plugin: |
3078 | self.output_plugin.convert(self.oeb, self.output, self.input_plugin, |
3079 | self.opts, self.log) |
3080 | + self.oeb.clean_temp_files() |
3081 | self.ui_reporter(1.) |
3082 | run_plugins_on_postprocess(self.output, self.output_fmt) |
3083 | |
3084 | |
3085 | === modified file 'src/calibre/ebooks/htmlz/input.py' |
3086 | --- src/calibre/ebooks/htmlz/input.py 2011-07-09 03:21:21 +0000 |
3087 | +++ src/calibre/ebooks/htmlz/input.py 2011-07-19 06:18:03 +0000 |
3088 | @@ -8,7 +8,7 @@ |
3089 | |
3090 | import os |
3091 | |
3092 | -from calibre import guess_type, walk |
3093 | +from calibre import guess_type |
3094 | from calibre.customize.conversion import InputFormatPlugin |
3095 | from calibre.ebooks.chardet import xml_to_unicode |
3096 | from calibre.ebooks.metadata.opf2 import OPF |
3097 | @@ -25,16 +25,50 @@ |
3098 | accelerators): |
3099 | self.log = log |
3100 | html = u'' |
3101 | + top_levels = [] |
3102 | |
3103 | # Extract content from zip archive. |
3104 | zf = ZipFile(stream) |
3105 | zf.extractall() |
3106 | |
3107 | - for x in walk('.'): |
3108 | + # Find the HTML file in the archive. It needs to be |
3109 | + # top level. |
3110 | + index = u'' |
3111 | + multiple_html = False |
3112 | + # Get a list of all top level files in the archive. |
3113 | + for x in os.listdir('.'): |
3114 | + if os.path.isfile(x): |
3115 | + top_levels.append(x) |
3116 | + # Try to find an index. file. |
3117 | + for x in top_levels: |
3118 | + if x.lower() in ('index.html', 'index.xhtml', 'index.htm'): |
3119 | + index = x |
3120 | + break |
3121 | + # Look for multiple HTML files in the archive. We look at the |
3122 | + # top level files only as only they matter in HTMLZ. |
3123 | + for x in top_levels: |
3124 | if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'): |
3125 | - with open(x, 'rb') as tf: |
3126 | - html = tf.read() |
3127 | - break |
3128 | + # Set index to the first HTML file found if it's not |
3129 | + # called index. |
3130 | + if not index: |
3131 | + index = x |
3132 | + else: |
3133 | + multiple_html = True |
3134 | + # Warn the user if there multiple HTML file in the archive. HTMLZ |
3135 | + # supports a single HTML file. A conversion with a multiple HTML file |
3136 | + # HTMLZ archive probably won't turn out as the user expects. With |
3137 | + # Multiple HTML files ZIP input should be used in place of HTMLZ. |
3138 | + if multiple_html: |
3139 | + log.warn(_('Multiple HTML files found in the archive. Only %s will be used.') % index) |
3140 | + |
3141 | + if index: |
3142 | + with open(index, 'rb') as tf: |
3143 | + html = tf.read() |
3144 | + else: |
3145 | + raise Exception(_('No top level HTML file found.')) |
3146 | + |
3147 | + if not html: |
3148 | + raise Exception(_('Top level HTML file %s is empty') % index) |
3149 | |
3150 | # Encoding |
3151 | if options.input_encoding: |
3152 | @@ -75,7 +109,7 @@ |
3153 | # Get the cover path from the OPF. |
3154 | cover_path = None |
3155 | opf = None |
3156 | - for x in walk('.'): |
3157 | + for x in top_levels: |
3158 | if os.path.splitext(x)[1].lower() in ('.opf'): |
3159 | opf = x |
3160 | break |
3161 | |
3162 | === modified file 'src/calibre/ebooks/metadata/book/base.py' |
3163 | --- src/calibre/ebooks/metadata/book/base.py 2011-07-10 19:29:15 +0000 |
3164 | +++ src/calibre/ebooks/metadata/book/base.py 2011-07-19 06:18:03 +0000 |
3165 | @@ -742,7 +742,7 @@ |
3166 | ans += [('ISBN', unicode(self.isbn))] |
3167 | ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))] |
3168 | if self.series: |
3169 | - ans += [_('Series'), unicode(self.series) + ' #%s'%self.format_series_index()] |
3170 | + ans += [(_('Series'), unicode(self.series) + ' #%s'%self.format_series_index())] |
3171 | ans += [(_('Language'), unicode(self.language))] |
3172 | if self.timestamp is not None: |
3173 | ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))] |
3174 | |
3175 | === modified file 'src/calibre/ebooks/mobi/debug.py' |
3176 | --- src/calibre/ebooks/mobi/debug.py 2011-04-16 02:11:05 +0000 |
3177 | +++ src/calibre/ebooks/mobi/debug.py 2011-07-19 06:18:03 +0000 |
3178 | @@ -7,10 +7,11 @@ |
3179 | __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
3180 | __docformat__ = 'restructuredtext en' |
3181 | |
3182 | -import struct, datetime |
3183 | +import struct, datetime, sys, os |
3184 | from calibre.utils.date import utc_tz |
3185 | from calibre.ebooks.mobi.langcodes import main_language, sub_language |
3186 | |
3187 | +# PalmDB {{{ |
3188 | class PalmDOCAttributes(object): |
3189 | |
3190 | class Attr(object): |
3191 | @@ -94,8 +95,9 @@ |
3192 | ans.append('Number of records: %s'%self.number_of_records) |
3193 | |
3194 | return '\n'.join(ans) |
3195 | +# }}} |
3196 | |
3197 | -class Record(object): |
3198 | +class Record(object): # {{{ |
3199 | |
3200 | def __init__(self, raw, header): |
3201 | self.offset, self.flags, self.uid = header |
3202 | @@ -103,9 +105,11 @@ |
3203 | |
3204 | @property |
3205 | def header(self): |
3206 | - return 'Offset: %d Flags: %d UID: %d'%(self.offset, self.flags, |
3207 | - self.uid) |
3208 | + return 'Offset: %d Flags: %d UID: %d First 4 bytes: %r Size: %d'%(self.offset, self.flags, |
3209 | + self.uid, self.raw[:4], len(self.raw)) |
3210 | +# }}} |
3211 | |
3212 | +# EXTH {{{ |
3213 | class EXTHRecord(object): |
3214 | |
3215 | def __init__(self, type_, data): |
3216 | @@ -189,9 +193,9 @@ |
3217 | for r in self.records: |
3218 | ans.append(str(r)) |
3219 | return '\n'.join(ans) |
3220 | - |
3221 | - |
3222 | -class MOBIHeader(object): |
3223 | +# }}} |
3224 | + |
3225 | +class MOBIHeader(object): # {{{ |
3226 | |
3227 | def __init__(self, record0): |
3228 | self.raw = record0.raw |
3229 | @@ -311,7 +315,8 @@ |
3230 | ans.append('Secondary index record: %d (null val: %d)'%( |
3231 | self.secondary_index_record, 0xffffffff)) |
3232 | ans.append('Reserved2: %r'%self.reserved2) |
3233 | - ans.append('First non-book record: %d'% self.first_non_book_record) |
3234 | + ans.append('First non-book record (null value: %d): %d'%(0xffffffff, |
3235 | + self.first_non_book_record)) |
3236 | ans.append('Full name offset: %d'%self.fullname_offset) |
3237 | ans.append('Full name length: %d bytes'%self.fullname_length) |
3238 | ans.append('Langcode: %r'%self.locale_raw) |
3239 | @@ -342,7 +347,8 @@ |
3240 | ans.append('FLIS count: %d'% self.flis_count) |
3241 | ans.append('Unknown6: %r'% self.unknown6) |
3242 | ans.append('Extra data flags: %r'%self.extra_data_flags) |
3243 | - ans.append('Primary index record: %d'%self.primary_index_record) |
3244 | + ans.append('Primary index record (null value: %d): %d'%(0xffffffff, |
3245 | + self.primary_index_record)) |
3246 | |
3247 | ans = '\n'.join(ans) |
3248 | |
3249 | @@ -355,8 +361,134 @@ |
3250 | |
3251 | ans += '\nRecord 0 length: %d'%len(self.raw) |
3252 | return ans |
3253 | - |
3254 | -class MOBIFile(object): |
3255 | +# }}} |
3256 | + |
3257 | +class TagX(object): # {{{ |
3258 | + |
3259 | + def __init__(self, raw, control_byte_count): |
3260 | + self.tag = ord(raw[0]) |
3261 | + self.num_values = ord(raw[1]) |
3262 | + self.bmask = ord(raw[2]) |
3263 | + self.bitmask = bin(self.bmask) |
3264 | + # End of file = 1 iff last entry |
3265 | + # When it is 1 all others are 0 |
3266 | + self.eof = ord(raw[3]) |
3267 | + |
3268 | + self.is_eof = (self.eof == 1 and self.tag == 0 and self.num_values == 0 |
3269 | + and self.bmask == 0) |
3270 | + |
3271 | + def __repr__(self): |
3272 | + return 'TAGX(tag=%02d, num_values=%d, bitmask=%r (%d), eof=%d)' % (self.tag, |
3273 | + self.num_values, self.bitmask, self.bmask, self.eof) |
3274 | + # }}} |
3275 | + |
3276 | +class PrimaryIndexRecord(object): # {{{ |
3277 | + |
3278 | + def __init__(self, record): |
3279 | + self.record = record |
3280 | + raw = self.record.raw |
3281 | + if raw[:4] != b'INDX': |
3282 | + raise ValueError('Invalid Primary Index Record') |
3283 | + |
3284 | + self.header_length, = struct.unpack('>I', raw[4:8]) |
3285 | + self.unknown1 = raw[8:16] |
3286 | + self.index_type, = struct.unpack('>I', raw[16:20]) |
3287 | + self.index_type_desc = {0: 'normal', 2: |
3288 | + 'inflection'}.get(self.index_type, 'unknown') |
3289 | + self.idxt_start, = struct.unpack('>I', raw[20:24]) |
3290 | + self.index_count, = struct.unpack('>I', raw[24:28]) |
3291 | + self.index_encoding_num, = struct.unpack('>I', raw[28:32]) |
3292 | + self.index_encoding = {65001: 'utf-8', 1252: |
3293 | + 'cp1252'}.get(self.index_encoding_num, 'unknown') |
3294 | + if self.index_encoding == 'unknown': |
3295 | + raise ValueError( |
3296 | + 'Unknown index encoding: %d'%self.index_encoding_num) |
3297 | + self.locale_raw, = struct.unpack(b'>I', raw[32:36]) |
3298 | + langcode = self.locale_raw |
3299 | + langid = langcode & 0xFF |
3300 | + sublangid = (langcode >> 10) & 0xFF |
3301 | + self.language = main_language.get(langid, 'ENGLISH') |
3302 | + self.sublanguage = sub_language.get(sublangid, 'NEUTRAL') |
3303 | + self.num_index_entries, = struct.unpack('>I', raw[36:40]) |
3304 | + self.ordt_start, = struct.unpack('>I', raw[40:44]) |
3305 | + self.ligt_start, = struct.unpack('>I', raw[44:48]) |
3306 | + self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52]) |
3307 | + self.num_of_ctoc_blocks, = struct.unpack('>I', raw[52:56]) |
3308 | + self.unknown2 = raw[56:180] |
3309 | + self.tagx_offset, = struct.unpack(b'>I', raw[180:184]) |
3310 | + if self.tagx_offset != self.header_length: |
3311 | + raise ValueError('TAGX offset and header length disagree') |
3312 | + self.unknown3 = raw[184:self.header_length] |
3313 | + |
3314 | + tagx = raw[self.header_length:] |
3315 | + if not tagx.startswith(b'TAGX'): |
3316 | + raise ValueError('Invalid TAGX section') |
3317 | + self.tagx_header_length, = struct.unpack('>I', tagx[4:8]) |
3318 | + self.tagx_control_byte_count, = struct.unpack('>I', tagx[8:12]) |
3319 | + tag_table = tagx[12:self.tagx_header_length] |
3320 | + if len(tag_table) % 4 != 0: |
3321 | + raise ValueError('Invalid Tag table') |
3322 | + num_tagx_entries = len(tag_table) // 4 |
3323 | + self.tagx_entries = [] |
3324 | + for i in range(num_tagx_entries): |
3325 | + self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4], |
3326 | + self.tagx_control_byte_count)) |
3327 | + if self.tagx_entries and not self.tagx_entries[-1].is_eof: |
3328 | + raise ValueError('TAGX last entry is not EOF') |
3329 | + |
3330 | + idxt0_pos = self.header_length+self.tagx_header_length |
3331 | + last_name_len, = struct.unpack(b'>B', raw[idxt0_pos]) |
3332 | + count_pos = idxt0_pos+1+last_name_len |
3333 | + last_num = int(raw[idxt0_pos+1:count_pos], 16) |
3334 | + self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2]) |
3335 | + |
3336 | + if last_num != self.ncx_count - 1: |
3337 | + raise ValueError('Last id number in the NCX != NCX count - 1') |
3338 | + # There may be some alignment zero bytes between the end of the idxt0 |
3339 | + # and self.idxt_start |
3340 | + |
3341 | + idxt = raw[self.idxt_start:] |
3342 | + if idxt[:4] != b'IDXT': |
3343 | + raise ValueError('Invalid IDXT header') |
3344 | + length_check, = struct.unpack(b'>H', idxt[4:6]) |
3345 | + if length_check != self.header_length + self.tagx_header_length: |
3346 | + raise ValueError('Length check failed') |
3347 | + |
3348 | + def __str__(self): |
3349 | + ans = ['*'*20 + ' Index Header '+ '*'*20] |
3350 | + a = ans.append |
3351 | + a('Header length: %d'%self.header_length) |
3352 | + a('Unknown1: %r (%d bytes) (All zeros: %r)'%(self.unknown1, |
3353 | + len(self.unknown1), not bool(self.unknown1.replace(b'\0', '')) )) |
3354 | + a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type)) |
3355 | + a('Offset to IDXT start: %d'%self.idxt_start) |
3356 | + a('Number of index records: %d'%self.index_count) |
3357 | + a('Index encoding: %s (%d)'%(self.index_encoding, |
3358 | + self.index_encoding_num)) |
3359 | + a('Index language: %s - %s (%s)'%(self.language, self.sublanguage, |
3360 | + hex(self.locale_raw))) |
3361 | + a('Number of index entries: %d'% self.num_index_entries) |
3362 | + a('ORDT start: %d'%self.ordt_start) |
3363 | + a('LIGT start: %d'%self.ligt_start) |
3364 | + a('Number of LIGT entries: %d'%self.num_of_ligt_entries) |
3365 | + a('Number of CTOC blocks: %d'%self.num_of_ctoc_blocks) |
3366 | + a('Unknown2: %r (%d bytes) (All zeros: %r)'%(self.unknown2, |
3367 | + len(self.unknown2), not bool(self.unknown2.replace(b'\0', '')) )) |
3368 | + a('TAGX offset: %d'%self.tagx_offset) |
3369 | + a('Unknown3: %r (%d bytes) (All zeros: %r)'%(self.unknown3, |
3370 | + len(self.unknown3), not bool(self.unknown3.replace(b'\0', '')) )) |
3371 | + a('\n\n') |
3372 | + a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20) |
3373 | + a('Header length: %d'%self.tagx_header_length) |
3374 | + a('Control byte count: %d'%self.tagx_control_byte_count) |
3375 | + for i in self.tagx_entries: |
3376 | + a('\t' + repr(i)) |
3377 | + a('Number of entries in the NCX: %d'% self.ncx_count) |
3378 | + |
3379 | + return '\n'.join(ans) |
3380 | + # }}} |
3381 | + |
3382 | +class MOBIFile(object): # {{{ |
3383 | |
3384 | def __init__(self, stream): |
3385 | self.raw = stream.read() |
3386 | @@ -384,25 +516,40 @@ |
3387 | |
3388 | self.mobi_header = MOBIHeader(self.records[0]) |
3389 | |
3390 | - |
3391 | - def print_header(self): |
3392 | - print (str(self.palmdb).encode('utf-8')) |
3393 | - print () |
3394 | - print ('Record headers:') |
3395 | + self.primary_index_record = None |
3396 | + pir = self.mobi_header.primary_index_record |
3397 | + if pir != 0xffffffff: |
3398 | + self.primary_index_record = PrimaryIndexRecord(self.records[pir]) |
3399 | + |
3400 | + |
3401 | + def print_header(self, f=sys.stdout): |
3402 | + print (str(self.palmdb).encode('utf-8'), file=f) |
3403 | + print (file=f) |
3404 | + print ('Record headers:', file=f) |
3405 | for i, r in enumerate(self.records): |
3406 | - print ('%6d. %s'%(i, r.header)) |
3407 | + print ('%6d. %s'%(i, r.header), file=f) |
3408 | |
3409 | - print () |
3410 | - print (str(self.mobi_header).encode('utf-8')) |
3411 | + print (file=f) |
3412 | + print (str(self.mobi_header).encode('utf-8'), file=f) |
3413 | +# }}} |
3414 | |
3415 | def inspect_mobi(path_or_stream): |
3416 | stream = (path_or_stream if hasattr(path_or_stream, 'read') else |
3417 | open(path_or_stream, 'rb')) |
3418 | f = MOBIFile(stream) |
3419 | - f.print_header() |
3420 | + ddir = 'debug_' + os.path.splitext(os.path.basename(stream.name))[0] |
3421 | + if not os.path.exists(ddir): |
3422 | + os.mkdir(ddir) |
3423 | + with open(os.path.join(ddir, 'header.txt'), 'wb') as out: |
3424 | + f.print_header(f=out) |
3425 | + if f.primary_index_record is not None: |
3426 | + with open(os.path.join(ddir, 'primary_index_record.txt'), 'wb') as out: |
3427 | + print(str(f.primary_index_record), file=out) |
3428 | + print ('Debug data saved to:', ddir) |
3429 | + |
3430 | +def main(): |
3431 | + inspect_mobi(sys.argv[1]) |
3432 | |
3433 | if __name__ == '__main__': |
3434 | - import sys |
3435 | - f = MOBIFile(open(sys.argv[1], 'rb')) |
3436 | - f.print_header() |
3437 | + main() |
3438 | |
3439 | |
3440 | === modified file 'src/calibre/ebooks/mobi/output.py' |
3441 | --- src/calibre/ebooks/mobi/output.py 2010-12-12 19:09:43 +0000 |
3442 | +++ src/calibre/ebooks/mobi/output.py 2011-07-19 06:18:03 +0000 |
3443 | @@ -27,7 +27,7 @@ |
3444 | ), |
3445 | OptionRecommendation(name='no_inline_toc', |
3446 | recommended_value=False, level=OptionRecommendation.LOW, |
3447 | - help=_('Don\'t add Table of Contents to end of book. Useful if ' |
3448 | + help=_('Don\'t add Table of Contents to the book. Useful if ' |
3449 | 'the book has its own table of contents.')), |
3450 | OptionRecommendation(name='toc_title', recommended_value=None, |
3451 | help=_('Title for any generated in-line table of contents.') |
3452 | @@ -45,6 +45,12 @@ |
3453 | 'the MOBI output plugin will try to convert margins specified' |
3454 | ' in the input document, otherwise it will ignore them.') |
3455 | ), |
3456 | + OptionRecommendation(name='mobi_toc_at_start', |
3457 | + recommended_value=False, |
3458 | + help=_('When adding the Table of Contents to the book, add it at the start of the ' |
3459 | + 'book instead of the end. Not recommended.') |
3460 | + ), |
3461 | + |
3462 | ]) |
3463 | |
3464 | def check_for_periodical(self): |
3465 | @@ -150,7 +156,7 @@ |
3466 | # Fix up the periodical href to point to first section href |
3467 | toc.nodes[0].href = toc.nodes[0].nodes[0].href |
3468 | |
3469 | - # GR diagnostics |
3470 | + # diagnostics |
3471 | if self.opts.verbose > 3: |
3472 | self.dump_toc(toc) |
3473 | self.dump_manifest() |
3474 | @@ -158,16 +164,14 @@ |
3475 | |
3476 | def convert(self, oeb, output_path, input_plugin, opts, log): |
3477 | self.log, self.opts, self.oeb = log, opts, oeb |
3478 | - from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \ |
3479 | - MobiWriter, PALMDOC, UNCOMPRESSED |
3480 | from calibre.ebooks.mobi.mobiml import MobiMLizer |
3481 | from calibre.ebooks.oeb.transforms.manglecase import CaseMangler |
3482 | from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable |
3483 | from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder |
3484 | from calibre.customize.ui import plugin_for_input_format |
3485 | - imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None |
3486 | if not opts.no_inline_toc: |
3487 | - tocadder = HTMLTOCAdder(title=opts.toc_title) |
3488 | + tocadder = HTMLTOCAdder(title=opts.toc_title, position='start' if |
3489 | + opts.mobi_toc_at_start else 'end') |
3490 | tocadder(oeb, opts) |
3491 | mangler = CaseMangler() |
3492 | mangler(oeb, opts) |
3493 | @@ -179,10 +183,14 @@ |
3494 | mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables) |
3495 | mobimlizer(oeb, opts) |
3496 | self.check_for_periodical() |
3497 | - write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz') |
3498 | - writer = MobiWriter(opts, imagemax=imagemax, |
3499 | - compression=UNCOMPRESSED if opts.dont_compress else PALMDOC, |
3500 | - prefer_author_sort=opts.prefer_author_sort, |
3501 | - write_page_breaks_after_item=write_page_breaks_after_item) |
3502 | + write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz') |
3503 | + from calibre.utils.config import tweaks |
3504 | + if tweaks.get('new_mobi_writer', False): |
3505 | + from calibre.ebooks.mobi.writer2.main import MobiWriter |
3506 | + MobiWriter |
3507 | + else: |
3508 | + from calibre.ebooks.mobi.writer import MobiWriter |
3509 | + writer = MobiWriter(opts, |
3510 | + write_page_breaks_after_item=write_page_breaks_after_item) |
3511 | writer(oeb, output_path) |
3512 | |
3513 | |
3514 | === modified file 'src/calibre/ebooks/mobi/writer.py' |
3515 | --- src/calibre/ebooks/mobi/writer.py 2011-06-15 16:12:34 +0000 |
3516 | +++ src/calibre/ebooks/mobi/writer.py 2011-07-19 06:18:03 +0000 |
3517 | @@ -111,7 +111,8 @@ |
3518 | |
3519 | def rescale_image(data, maxsizeb, dimen=None): |
3520 | if dimen is not None: |
3521 | - data = thumbnail(data, width=dimen, height=dimen)[-1] |
3522 | + data = thumbnail(data, width=dimen[0], height=dimen[1], |
3523 | + compression_quality=90)[-1] |
3524 | else: |
3525 | # Replace transparent pixels with white pixels and convert to JPEG |
3526 | data = save_cover_data_to(data, 'img.jpg', return_data=True) |
3527 | @@ -141,7 +142,7 @@ |
3528 | scale -= 0.05 |
3529 | return data |
3530 | |
3531 | -class Serializer(object): |
3532 | +class Serializer(object): # {{{ |
3533 | NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} |
3534 | |
3535 | def __init__(self, oeb, images, write_page_breaks_after_item=True): |
3536 | @@ -172,6 +173,9 @@ |
3537 | hrefs = self.oeb.manifest.hrefs |
3538 | buffer.write('<guide>') |
3539 | for ref in self.oeb.guide.values(): |
3540 | + # The Kindle decides where to open a book based on the presence of |
3541 | + # an item in the guide that looks like |
3542 | + # <reference type="text" title="Start" href="chapter-one.xhtml"/> |
3543 | path = urldefrag(ref.href)[0] |
3544 | if path not in hrefs or hrefs[path].media_type not in OEB_DOCS: |
3545 | continue |
3546 | @@ -215,12 +219,6 @@ |
3547 | self.anchor_offset = buffer.tell() |
3548 | buffer.write('<body>') |
3549 | self.anchor_offset_kindle = buffer.tell() |
3550 | - # CybookG3 'Start Reading' link |
3551 | - if 'text' in self.oeb.guide: |
3552 | - href = self.oeb.guide['text'].href |
3553 | - buffer.write('<a ') |
3554 | - self.serialize_href(href) |
3555 | - buffer.write(' />') |
3556 | spine = [item for item in self.oeb.spine if item.linear] |
3557 | spine.extend([item for item in self.oeb.spine if not item.linear]) |
3558 | for item in spine: |
3559 | @@ -315,16 +313,20 @@ |
3560 | buffer.seek(hoff) |
3561 | buffer.write('%010d' % ioff) |
3562 | |
3563 | + # }}} |
3564 | + |
3565 | class MobiWriter(object): |
3566 | COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') |
3567 | |
3568 | - def __init__(self, opts, compression=PALMDOC, imagemax=None, |
3569 | - prefer_author_sort=False, write_page_breaks_after_item=True): |
3570 | + def __init__(self, opts, |
3571 | + write_page_breaks_after_item=True): |
3572 | self.opts = opts |
3573 | self.write_page_breaks_after_item = write_page_breaks_after_item |
3574 | - self._compression = compression or UNCOMPRESSED |
3575 | - self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE |
3576 | - self._prefer_author_sort = prefer_author_sort |
3577 | + self._compression = UNCOMPRESSED if getattr(opts, 'dont_compress', |
3578 | + False) else PALMDOC |
3579 | + self._imagemax = (PALM_MAX_IMAGE_SIZE if getattr(opts, |
3580 | + 'rescale_images', False) else OTHER_MAX_IMAGE_SIZE) |
3581 | + self._prefer_author_sort = getattr(opts, 'prefer_author_sort', False) |
3582 | self._primary_index_record = None |
3583 | self._conforming_periodical_toc = False |
3584 | self._indexable = False |
3585 | @@ -1325,6 +1327,8 @@ |
3586 | except: |
3587 | self._oeb.logger.warn('Bad image file %r' % item.href) |
3588 | continue |
3589 | + finally: |
3590 | + item.unload_data_from_memory() |
3591 | self._records.append(data) |
3592 | if self._first_image_record is None: |
3593 | self._first_image_record = len(self._records)-1 |
3594 | |
3595 | === added directory 'src/calibre/ebooks/mobi/writer2' |
3596 | === added file 'src/calibre/ebooks/mobi/writer2/__init__.py' |
3597 | --- src/calibre/ebooks/mobi/writer2/__init__.py 1970-01-01 00:00:00 +0000 |
3598 | +++ src/calibre/ebooks/mobi/writer2/__init__.py 2011-07-19 06:18:03 +0000 |
3599 | @@ -0,0 +1,15 @@ |
3600 | +#!/usr/bin/env python |
3601 | +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai |
3602 | +from __future__ import (unicode_literals, division, absolute_import, |
3603 | + print_function) |
3604 | + |
3605 | +__license__ = 'GPL v3' |
3606 | +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
3607 | +__docformat__ = 'restructuredtext en' |
3608 | + |
3609 | + |
3610 | +UNCOMPRESSED = 1 |
3611 | +PALMDOC = 2 |
3612 | +HUFFDIC = 17480 |
3613 | +PALM_MAX_IMAGE_SIZE = 63 * 1024 |
3614 | + |
3615 | |
3616 | === added file 'src/calibre/ebooks/mobi/writer2/main.py' |
3617 | --- src/calibre/ebooks/mobi/writer2/main.py 1970-01-01 00:00:00 +0000 |
3618 | +++ src/calibre/ebooks/mobi/writer2/main.py 2011-07-19 06:18:03 +0000 |
3619 | @@ -0,0 +1,579 @@ |
3620 | +#!/usr/bin/env python |
3621 | +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai |
3622 | +from __future__ import (unicode_literals, division, absolute_import, |
3623 | + print_function) |
3624 | + |
3625 | +__license__ = 'GPL v3' |
3626 | +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
3627 | +__docformat__ = 'restructuredtext en' |
3628 | + |
3629 | +import re, random, time |
3630 | +from cStringIO import StringIO |
3631 | +from struct import pack |
3632 | + |
3633 | +from calibre.ebooks import normalize |
3634 | +from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES |
3635 | +from calibre.ebooks.mobi.writer2.serializer import Serializer |
3636 | +from calibre.ebooks.compression.palmdoc import compress_doc |
3637 | +from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail |
3638 | +from calibre.ebooks.mobi.langcodes import iana2mobi |
3639 | +from calibre.utils.filenames import ascii_filename |
3640 | +from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED |
3641 | + |
3642 | +EXTH_CODES = { |
3643 | + 'creator': 100, |
3644 | + 'publisher': 101, |
3645 | + 'description': 103, |
3646 | + 'identifier': 104, |
3647 | + 'subject': 105, |
3648 | + 'pubdate': 106, |
3649 | + 'date': 106, |
3650 | + 'review': 107, |
3651 | + 'contributor': 108, |
3652 | + 'rights': 109, |
3653 | + 'type': 111, |
3654 | + 'source': 112, |
3655 | + 'title': 503, |
3656 | + } |
3657 | + |
3658 | +# Disabled as I dont care about uncrossable breaks |
3659 | +WRITE_UNCROSSABLE_BREAKS = False |
3660 | + |
3661 | +RECORD_SIZE = 0x1000 # 4096 |
3662 | + |
3663 | +IMAGE_MAX_SIZE = 10 * 1024 * 1024 |
3664 | +MAX_THUMB_SIZE = 16 * 1024 |
3665 | +MAX_THUMB_DIMEN = (180, 240) |
3666 | + |
3667 | +# Almost like the one for MS LIT, but not quite. |
3668 | +DECINT_FORWARD = 0 |
3669 | +DECINT_BACKWARD = 1 |
3670 | + |
3671 | +def decint(value, direction): |
3672 | + ''' |
3673 | + Some parts of the Mobipocket format encode data as variable-width integers. |
3674 | + These integers are represented big-endian with 7 bits per byte in bits 1-7. |
3675 | + They may be either forward-encoded, in which case only the LSB has bit 8 set, |
3676 | + or backward-encoded, in which case only the MSB has bit 8 set. |
3677 | + For example, the number 0x11111 would be represented forward-encoded as: |
3678 | + |
3679 | + 0x04 0x22 0x91 |
3680 | + |
3681 | + And backward-encoded as: |
3682 | + |
3683 | + 0x84 0x22 0x11 |
3684 | + |
3685 | + This function encodes the integer ``value`` as a variable width integer and |
3686 | + returns the bytestring corresponding to it. |
3687 | + ''' |
3688 | + # Encode vwi |
3689 | + byts = bytearray() |
3690 | + while True: |
3691 | + b = value & 0x7f |
3692 | + value >>= 7 |
3693 | + byts.append(b) |
3694 | + if value == 0: |
3695 | + break |
3696 | + if direction == DECINT_FORWARD: |
3697 | + byts[0] |= 0x80 |
3698 | + elif direction == DECINT_BACKWARD: |
3699 | + byts[-1] |= 0x80 |
3700 | + return bytes(byts) |
3701 | + |
3702 | +def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None): |
3703 | + ''' |
3704 | + Convert image setting all transparent pixels to white and changing format |
3705 | + to JPEG. Ensure the resultant image has a byte size less than |
3706 | + maxsizeb. |
3707 | + |
3708 | + If dimen is not None, generate a thumbnail of width=dimen, height=dimen |
3709 | + |
3710 | + Returns the image as a bytestring |
3711 | + ''' |
3712 | + if dimen is not None: |
3713 | + data = thumbnail(data, width=dimen, height=dimen, |
3714 | + compression_quality=90)[-1] |
3715 | + else: |
3716 | + # Replace transparent pixels with white pixels and convert to JPEG |
3717 | + data = save_cover_data_to(data, 'img.jpg', return_data=True) |
3718 | + if len(data) <= maxsizeb: |
3719 | + return data |
3720 | + orig_data = data |
3721 | + img = Image() |
3722 | + quality = 95 |
3723 | + |
3724 | + img.load(data) |
3725 | + while len(data) >= maxsizeb and quality >= 10: |
3726 | + quality -= 5 |
3727 | + img.set_compression_quality(quality) |
3728 | + data = img.export('jpg') |
3729 | + if len(data) <= maxsizeb: |
3730 | + return data |
3731 | + orig_data = data |
3732 | + |
3733 | + scale = 0.9 |
3734 | + while len(data) >= maxsizeb and scale >= 0.05: |
3735 | + img = Image() |
3736 | + img.load(orig_data) |
3737 | + w, h = img.size |
3738 | + img.size = (int(scale*w), int(scale*h)) |
3739 | + img.set_compression_quality(quality) |
3740 | + data = img.export('jpg') |
3741 | + scale -= 0.05 |
3742 | + return data |
3743 | + |
3744 | +class MobiWriter(object): |
3745 | + COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+') |
3746 | + |
3747 | + def __init__(self, opts, write_page_breaks_after_item=True): |
3748 | + self.opts = opts |
3749 | + self.write_page_breaks_after_item = write_page_breaks_after_item |
3750 | + self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC |
3751 | + self.prefer_author_sort = opts.prefer_author_sort |
3752 | + |
3753 | + def __call__(self, oeb, path_or_stream): |
3754 | + if hasattr(path_or_stream, 'write'): |
3755 | + return self.dump_stream(oeb, path_or_stream) |
3756 | + with open(path_or_stream, 'w+b') as stream: |
3757 | + return self.dump_stream(oeb, stream) |
3758 | + |
3759 | + def write(self, *args): |
3760 | + for datum in args: |
3761 | + self.stream.write(datum) |
3762 | + |
3763 | + def tell(self): |
3764 | + return self.stream.tell() |
3765 | + |
3766 | + def dump_stream(self, oeb, stream): |
3767 | + self.oeb = oeb |
3768 | + self.stream = stream |
3769 | + self.records = [None] |
3770 | + self.generate_content() |
3771 | + self.generate_record0() |
3772 | + self.write_header() |
3773 | + self.write_content() |
3774 | + |
3775 | + def generate_content(self): |
3776 | + self.map_image_names() |
3777 | + self.generate_text() |
3778 | + # Image records come after text records |
3779 | + self.generate_images() |
3780 | + |
3781 | + def map_image_names(self): |
3782 | + ''' |
3783 | + Map image names to record indices, ensuring that the masthead image if |
3784 | + present has index number 1. |
3785 | + ''' |
3786 | + index = 1 |
3787 | + self.images = images = {} |
3788 | + mh_href = None |
3789 | + |
3790 | + if 'masthead' in self.oeb.guide: |
3791 | + mh_href = self.oeb.guide['masthead'].href |
3792 | + images[mh_href] = 1 |
3793 | + index += 1 |
3794 | + |
3795 | + for item in self.oeb.manifest.values(): |
3796 | + if item.media_type in OEB_RASTER_IMAGES: |
3797 | + if item.href == mh_href: continue |
3798 | + images[item.href] = index |
3799 | + index += 1 |
3800 | + |
3801 | + def generate_images(self): |
3802 | + self.oeb.logger.info('Serializing images...') |
3803 | + images = [(index, href) for href, index in self.images.iteritems()] |
3804 | + images.sort() |
3805 | + self.first_image_record = None |
3806 | + for _, href in images: |
3807 | + item = self.oeb.manifest.hrefs[href] |
3808 | + try: |
3809 | + data = rescale_image(item.data) |
3810 | + except: |
3811 | + self.oeb.logger.warn('Bad image file %r' % item.href) |
3812 | + continue |
3813 | + finally: |
3814 | + item.unload_data_from_memory() |
3815 | + self.records.append(data) |
3816 | + if self.first_image_record is None: |
3817 | + self.first_image_record = len(self.records) - 1 |
3818 | + |
3819 | + def generate_text(self): |
3820 | + self.oeb.logger.info('Serializing markup content...') |
3821 | + serializer = Serializer(self.oeb, self.images, |
3822 | + write_page_breaks_after_item=self.write_page_breaks_after_item) |
3823 | + text = serializer() |
3824 | + breaks = serializer.breaks |
3825 | + self.anchor_offset_kindle = serializer.anchor_offset_kindle |
3826 | + self.id_offsets = serializer.id_offsets |
3827 | + self.content_length = len(text) |
3828 | + self.text_length = len(text) |
3829 | + text = StringIO(text) |
3830 | + buf = [] |
3831 | + nrecords = 0 |
3832 | + offset = 0 |
3833 | + |
3834 | + if self.compression != UNCOMPRESSED: |
3835 | + self.oeb.logger.info(' Compressing markup content...') |
3836 | + data, overlap = self.read_text_record(text) |
3837 | + |
3838 | + while len(data) > 0: |
3839 | + if self.compression == PALMDOC: |
3840 | + data = compress_doc(data) |
3841 | + record = StringIO() |
3842 | + record.write(data) |
3843 | + |
3844 | + self.records.append(record.getvalue()) |
3845 | + buf.append(self.records[-1]) |
3846 | + nrecords += 1 |
3847 | + offset += RECORD_SIZE |
3848 | + data, overlap = self.read_text_record(text) |
3849 | + |
3850 | + # Write information about the mutibyte character overlap, if any |
3851 | + record.write(overlap) |
3852 | + record.write(pack(b'>B', len(overlap))) |
3853 | + |
3854 | + # Write information about uncrossable breaks (non linear items in |
3855 | + # the spine) |
3856 | + if WRITE_UNCROSSABLE_BREAKS: |
3857 | + nextra = 0 |
3858 | + pbreak = 0 |
3859 | + running = offset |
3860 | + |
3861 | + # Write information about every uncrossable break that occurs in |
3862 | + # the next record. |
3863 | + while breaks and (breaks[0] - offset) < RECORD_SIZE: |
3864 | + pbreak = (breaks.pop(0) - running) >> 3 |
3865 | + encoded = decint(pbreak, DECINT_FORWARD) |
3866 | + record.write(encoded) |
3867 | + running += pbreak << 3 |
3868 | + nextra += len(encoded) |
3869 | + lsize = 1 |
3870 | + while True: |
3871 | + size = decint(nextra + lsize, DECINT_BACKWARD) |
3872 | + if len(size) == lsize: |
3873 | + break |
3874 | + lsize += 1 |
3875 | + record.write(size) |
3876 | + |
3877 | + self.text_nrecords = nrecords + 1 |
3878 | + |
3879 | + def read_text_record(self, text): |
3880 | + ''' |
3881 | + Return a Palmdoc record of size RECORD_SIZE from the text file object. |
3882 | + In case the record ends in the middle of a multibyte character return |
3883 | + the overlap as well. |
3884 | + |
3885 | + Returns data, overlap: where both are byte strings. overlap is the |
3886 | + extra bytes needed to complete the truncated multibyte character. |
3887 | + ''' |
3888 | + opos = text.tell() |
3889 | + text.seek(0, 2) |
3890 | + # npos is the position of the next record |
3891 | + npos = min((opos + RECORD_SIZE, text.tell())) |
3892 | + # Number of bytes from the next record needed to complete the last |
3893 | + # character in this record |
3894 | + extra = 0 |
3895 | + |
3896 | + last = b'' |
3897 | + while not last.decode('utf-8', 'ignore'): |
3898 | + # last contains no valid utf-8 characters |
3899 | + size = len(last) + 1 |
3900 | + text.seek(npos - size) |
3901 | + last = text.read(size) |
3902 | + |
3903 | + # last now has one valid utf-8 char and possibly some bytes that belong |
3904 | + # to a truncated char |
3905 | + |
3906 | + try: |
3907 | + last.decode('utf-8', 'strict') |
3908 | + except UnicodeDecodeError: |
3909 | + # There are some truncated bytes in last |
3910 | + prev = len(last) |
3911 | + while True: |
3912 | + text.seek(npos - prev) |
3913 | + last = text.read(len(last) + 1) |
3914 | + try: |
3915 | + last.decode('utf-8') |
3916 | + except UnicodeDecodeError: |
3917 | + pass |
3918 | + else: |
3919 | + break |
3920 | + extra = len(last) - prev |
3921 | + |
3922 | + text.seek(opos) |
3923 | + data = text.read(RECORD_SIZE) |
3924 | + overlap = text.read(extra) |
3925 | + text.seek(npos) |
3926 | + |
3927 | + return data, overlap |
3928 | + |
3929 | + def generate_end_records(self): |
3930 | + self.flis_number = len(self.records) |
3931 | + self.records.append('\xE9\x8E\x0D\x0A') |
3932 | + |
3933 | + def generate_record0(self): # {{{ |
3934 | + metadata = self.oeb.metadata |
3935 | + exth = self.build_exth() |
3936 | + last_content_record = len(self.records) - 1 |
3937 | + |
3938 | + self.generate_end_records() |
3939 | + |
3940 | + record0 = StringIO() |
3941 | + # The PalmDOC Header |
3942 | + record0.write(pack(b'>HHIHHHH', self.compression, 0, |
3943 | + self.text_length, |
3944 | + self.text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf) |
3945 | + uid = random.randint(0, 0xffffffff) |
3946 | + title = normalize(unicode(metadata.title[0])).encode('utf-8') |
3947 | + # The MOBI Header |
3948 | + |
3949 | + # 0x0 - 0x3 |
3950 | + record0.write(b'MOBI') |
3951 | + |
3952 | + # 0x4 - 0x7 : Length of header |
3953 | + # 0x8 - 0x11 : MOBI type |
3954 | + # type meaning |
3955 | + # 0x002 MOBI book (chapter - chapter navigation) |
3956 | + # 0x101 News - Hierarchical navigation with sections and articles |
3957 | + # 0x102 News feed - Flat navigation |
3958 | + # 0x103 News magazine - same as 0x101 |
3959 | + # 0xC - 0xF : Text encoding (65001 is utf-8) |
3960 | + # 0x10 - 0x13 : UID |
3961 | + # 0x14 - 0x17 : Generator version |
3962 | + |
3963 | + record0.write(pack(b'>IIIII', |
3964 | + 0xe8, 0x002, 65001, uid, 6)) |
3965 | + |
3966 | + # 0x18 - 0x1f : Unknown |
3967 | + record0.write(b'\xff' * 8) |
3968 | + |
3969 | + |
3970 | + # 0x20 - 0x23 : Secondary index record |
3971 | + record0.write(pack(b'>I', 0xffffffff)) |
3972 | + |
3973 | + # 0x24 - 0x3f : Unknown |
3974 | + record0.write(b'\xff' * 28) |
3975 | + |
3976 | + # 0x40 - 0x43 : Offset of first non-text record |
3977 | + record0.write(pack(b'>I', |
3978 | + self.text_nrecords + 1)) |
3979 | + |
3980 | + # 0x44 - 0x4b : title offset, title length |
3981 | + record0.write(pack(b'>II', |
3982 | + 0xe8 + 16 + len(exth), len(title))) |
3983 | + |
3984 | + # 0x4c - 0x4f : Language specifier |
3985 | + record0.write(iana2mobi( |
3986 | + str(metadata.language[0]))) |
3987 | + |
3988 | + # 0x50 - 0x57 : Unknown |
3989 | + record0.write(b'\0' * 8) |
3990 | + |
3991 | + # 0x58 - 0x5b : Format version |
3992 | + # 0x5c - 0x5f : First image record number |
3993 | + record0.write(pack(b'>II', |
3994 | + 6, self.first_image_record if self.first_image_record else 0)) |
3995 | + |
3996 | + # 0x60 - 0x63 : First HUFF/CDIC record number |
3997 | + # 0x64 - 0x67 : Number of HUFF/CDIC records |
3998 | + # 0x68 - 0x6b : First DATP record number |
3999 | + # 0x6c - 0x6f : Number of DATP records |
4000 | + record0.write(b'\0' * 16) |
4001 | + |
4002 | + # 0x70 - 0x73 : EXTH flags |
4003 | + record0.write(pack(b'>I', 0x50)) |
4004 | + |
4005 | + # 0x74 - 0x93 : Unknown |
4006 | + record0.write(b'\0' * 32) |
4007 | + |
4008 | + # 0x94 - 0x97 : DRM offset |
4009 | + # 0x98 - 0x9b : DRM count |
4010 | + # 0x9c - 0x9f : DRM size |
4011 | + # 0xa0 - 0xa3 : DRM flags |
4012 | + record0.write(pack(b'>IIII', |
4013 | + 0xffffffff, 0xffffffff, 0, 0)) |
4014 | + |
4015 | + |
4016 | + # 0xa4 - 0xaf : Unknown |
4017 | + record0.write(b'\0'*12) |
4018 | + |
4019 | + # 0xb0 - 0xb1 : First content record number |
4020 | + # 0xb2 - 0xb3 : last content record number |
4021 | + # (Includes Image, DATP, HUFF, DRM) |
4022 | + record0.write(pack(b'>HH', 1, last_content_record)) |
4023 | + |
4024 | + # 0xb4 - 0xb7 : Unknown |
4025 | + record0.write(b'\0\0\0\x01') |
4026 | + |
4027 | + # 0xb8 - 0xbb : FCIS record number |
4028 | + record0.write(pack(b'>I', 0xffffffff)) |
4029 | + |
4030 | + # 0xbc - 0xbf : Unknown (FCIS record count?) |
4031 | + record0.write(pack(b'>I', 0xffffffff)) |
4032 | + |
4033 | + # 0xc0 - 0xc3 : FLIS record number |
4034 | + record0.write(pack(b'>I', 0xffffffff)) |
4035 | + |
4036 | + # 0xc4 - 0xc7 : Unknown (FLIS record count?) |
4037 | + record0.write(pack(b'>I', 1)) |
4038 | + |
4039 | + # 0xc8 - 0xcf : Unknown |
4040 | + record0.write(b'\0'*8) |
4041 | + |
4042 | + # 0xd0 - 0xdf : Unknown |
4043 | + record0.write(pack(b'>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff)) |
4044 | + |
4045 | + # 0xe0 - 0xe3 : Extra record data |
4046 | + # Extra record data flags: |
4047 | + # - 0x1: <extra multibyte bytes><size> (?) |
4048 | + # - 0x2: <TBS indexing description of this HTML record><size> GR |
4049 | + # - 0x4: <uncrossable breaks><size> |
4050 | + # GR: Use 7 for indexed files, 5 for unindexed |
4051 | + # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality |
4052 | + |
4053 | + extra_data_flags = 0b1 # Has multibyte overlap bytes |
4054 | + if WRITE_UNCROSSABLE_BREAKS: |
4055 | + extra_data_flags |= 0b100 |
4056 | + record0.write(pack(b'>I', extra_data_flags)) |
4057 | + |
4058 | + # 0xe4 - 0xe7 : Primary index record |
4059 | + record0.write(pack(b'>I', 0xffffffff)) |
4060 | + |
4061 | + record0.write(exth) |
4062 | + record0.write(title) |
4063 | + record0 = record0.getvalue() |
4064 | + # Add some buffer so that Amazon can add encryption information if this |
4065 | + # MOBI is submitted for publication |
4066 | + record0 += (b'\0' * (1024*8)) |
4067 | + self.records[0] = record0 |
4068 | + # }}} |
4069 | + |
4070 | + def build_exth(self): # {{{ |
4071 | + oeb = self.oeb |
4072 | + exth = StringIO() |
4073 | + nrecs = 0 |
4074 | + for term in oeb.metadata: |
4075 | + if term not in EXTH_CODES: continue |
4076 | + code = EXTH_CODES[term] |
4077 | + items = oeb.metadata[term] |
4078 | + if term == 'creator': |
4079 | + if self.prefer_author_sort: |
4080 | + creators = [normalize(unicode(c.file_as or c)) for c in items] |
4081 | + else: |
4082 | + creators = [normalize(unicode(c)) for c in items] |
4083 | + items = ['; '.join(creators)] |
4084 | + for item in items: |
4085 | + data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item))) |
4086 | + if term == 'identifier': |
4087 | + if data.lower().startswith('urn:isbn:'): |
4088 | + data = data[9:] |
4089 | + elif item.scheme.lower() == 'isbn': |
4090 | + pass |
4091 | + else: |
4092 | + continue |
4093 | + data = data.encode('utf-8') |
4094 | + exth.write(pack(b'>II', code, len(data) + 8)) |
4095 | + exth.write(data) |
4096 | + nrecs += 1 |
4097 | + if term == 'rights' : |
4098 | + try: |
4099 | + rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8') |
4100 | + except: |
4101 | + rights = b'Unknown' |
4102 | + exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8)) |
4103 | + exth.write(rights) |
4104 | + nrecs += 1 |
4105 | + |
4106 | + # Write UUID as ASIN |
4107 | + uuid = None |
4108 | + from calibre.ebooks.oeb.base import OPF |
4109 | + for x in oeb.metadata['identifier']: |
4110 | + if (x.get(OPF('scheme'), None).lower() == 'uuid' or |
4111 | + unicode(x).startswith('urn:uuid:')): |
4112 | + uuid = unicode(x).split(':')[-1] |
4113 | + break |
4114 | + if uuid is None: |
4115 | + from uuid import uuid4 |
4116 | + uuid = str(uuid4()) |
4117 | + |
4118 | + if isinstance(uuid, unicode): |
4119 | + uuid = uuid.encode('utf-8') |
4120 | + exth.write(pack(b'>II', 113, len(uuid) + 8)) |
4121 | + exth.write(uuid) |
4122 | + nrecs += 1 |
4123 | + |
4124 | + # Write cdetype |
4125 | + if not self.opts.mobi_periodical: |
4126 | + data = b'EBOK' |
4127 | + exth.write(pack(b'>II', 501, len(data)+8)) |
4128 | + exth.write(data) |
4129 | + nrecs += 1 |
4130 | + |
4131 | + # Add a publication date entry |
4132 | + if oeb.metadata['date'] != [] : |
4133 | + datestr = str(oeb.metadata['date'][0]) |
4134 | + elif oeb.metadata['timestamp'] != [] : |
4135 | + datestr = str(oeb.metadata['timestamp'][0]) |
4136 | + |
4137 | + if datestr is not None: |
4138 | + exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8)) |
4139 | + exth.write(datestr) |
4140 | + nrecs += 1 |
4141 | + else: |
4142 | + raise NotImplementedError("missing date or timestamp needed for mobi_periodical") |
4143 | + |
4144 | + if (oeb.metadata.cover and |
4145 | + unicode(oeb.metadata.cover[0]) in oeb.manifest.ids): |
4146 | + id = unicode(oeb.metadata.cover[0]) |
4147 | + item = oeb.manifest.ids[id] |
4148 | + href = item.href |
4149 | + if href in self.images: |
4150 | + index = self.images[href] - 1 |
4151 | + exth.write(pack(b'>III', 0xc9, 0x0c, index)) |
4152 | + exth.write(pack(b'>III', 0xcb, 0x0c, 0)) |
4153 | + nrecs += 2 |
4154 | + index = self.add_thumbnail(item) |
4155 | + if index is not None: |
4156 | + exth.write(pack(b'>III', 0xca, 0x0c, index - 1)) |
4157 | + nrecs += 1 |
4158 | + |
4159 | + exth = exth.getvalue() |
4160 | + trail = len(exth) % 4 |
4161 | + pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte |
4162 | + exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad] |
4163 | + return b''.join(exth) |
4164 | + # }}} |
4165 | + |
4166 | + def add_thumbnail(self, item): |
4167 | + try: |
4168 | + data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN, |
4169 | + maxsizeb=MAX_THUMB_SIZE) |
4170 | + except IOError: |
4171 | + self.oeb.logger.warn('Bad image file %r' % item.href) |
4172 | + return None |
4173 | + manifest = self.oeb.manifest |
4174 | + id, href = manifest.generate('thumbnail', 'thumbnail.jpeg') |
4175 | + manifest.add(id, href, 'image/jpeg', data=data) |
4176 | + index = len(self.images) + 1 |
4177 | + self.images[href] = index |
4178 | + self.records.append(data) |
4179 | + return index |
4180 | + |
4181 | + def write_header(self): |
4182 | + title = ascii_filename(unicode(self.oeb.metadata.title[0])) |
4183 | + title = title + (b'\0' * (32 - len(title))) |
4184 | + now = int(time.time()) |
4185 | + nrecords = len(self.records) |
4186 | + self.write(title, pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0), |
4187 | + b'BOOK', b'MOBI', pack(b'>IIH', nrecords, 0, nrecords)) |
4188 | + offset = self.tell() + (8 * nrecords) + 2 |
4189 | + for i, record in enumerate(self.records): |
4190 | + self.write(pack(b'>I', offset), b'\0', pack(b'>I', 2*i)[1:]) |
4191 | + offset += len(record) |
4192 | + self.write(b'\0\0') |
4193 | + |
4194 | + def write_content(self): |
4195 | + for record in self.records: |
4196 | + self.write(record) |
4197 | + |
4198 | + |
4199 | |
4200 | === added file 'src/calibre/ebooks/mobi/writer2/serializer.py' |
4201 | --- src/calibre/ebooks/mobi/writer2/serializer.py 1970-01-01 00:00:00 +0000 |
4202 | +++ src/calibre/ebooks/mobi/writer2/serializer.py 2011-07-19 06:18:03 +0000 |
4203 | @@ -0,0 +1,246 @@ |
4204 | +#!/usr/bin/env python |
4205 | +# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai |
4206 | +from __future__ import (unicode_literals, division, absolute_import, |
4207 | + print_function) |
4208 | + |
4209 | +__license__ = 'GPL v3' |
4210 | +__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>' |
4211 | +__docformat__ = 'restructuredtext en' |
4212 | + |
4213 | +from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS, |
4214 | + namespace, prefixname, urlnormalize) |
4215 | +from calibre.ebooks.mobi.mobiml import MBP_NS |
4216 | + |
4217 | +from collections import defaultdict |
4218 | +from urlparse import urldefrag |
4219 | +from cStringIO import StringIO |
4220 | + |
4221 | + |
4222 | +class Serializer(object): |
4223 | + NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'} |
4224 | + |
4225 | + def __init__(self, oeb, images, write_page_breaks_after_item=True): |
4226 | + ''' |
4227 | + Write all the HTML markup in oeb into a single in memory buffer |
4228 | + containing a single html document with links replaced by offsets into |
4229 | + the buffer. |
4230 | + |
4231 | + :param oeb: OEBBook object that encapsulates the document to be |
4232 | + processed. |
4233 | + |
4234 | + :param images: Mapping of image hrefs (urlnormalized) to image record |
4235 | + indices. |
4236 | + |
4237 | + :param write_page_breaks_after_item: If True a MOBIpocket pagebreak tag |
4238 | + is written after every element of the spine in ``oeb``. |
4239 | + ''' |
4240 | + self.oeb = oeb |
4241 | + self.images = images |
4242 | + self.logger = oeb.logger |
4243 | + self.write_page_breaks_after_item = write_page_breaks_after_item |
4244 | + |
4245 | + # Mapping of hrefs (urlnormalized) to the offset in the buffer where |
4246 | + # the resource pointed to by the href lives. Used at the end to fill in |
4247 | + # the correct values into all filepos="..." links. |
4248 | + self.id_offsets = {} |
4249 | + |
4250 | + # Mapping of hrefs (urlnormalized) to a list of offsets into the buffer |
4251 | + # where filepos="..." elements are written corresponding to links that |
4252 | + # point to the href. This is used at the end to fill in the correct values. |
4253 | + self.href_offsets = defaultdict(list) |
4254 | + |
4255 | + # List of offsets in the buffer of non linear items in the spine. These |
4256 | + # become uncrossable breaks in the MOBI |
4257 | + self.breaks = [] |
4258 | + |
4259 | + def __call__(self): |
4260 | + ''' |
4261 | + Return the document serialized as a single UTF-8 encoded bytestring. |
4262 | + ''' |
4263 | + buf = self.buf = StringIO() |
4264 | + buf.write(b'<html>') |
4265 | + self.serialize_head() |
4266 | + self.serialize_body() |
4267 | + buf.write(b'</html>') |
4268 | + self.fixup_links() |
4269 | + return buf.getvalue() |
4270 | + |
4271 | + def serialize_head(self): |
4272 | + buf = self.buf |
4273 | + buf.write(b'<head>') |
4274 | + if len(self.oeb.guide) > 0: |
4275 | + self.serialize_guide() |
4276 | + buf.write(b'</head>') |
4277 | + |
4278 | + def serialize_guide(self): |
4279 | + ''' |
4280 | + The Kindle decides where to open a book based on the presence of |
4281 | + an item in the guide that looks like |
4282 | + <reference type="text" title="Start" href="chapter-one.xhtml"/> |
4283 | + |
4284 | + Similarly an item with type="toc" controls where the Goto Table of |
4285 | + Contents operation on the kindle goes. |
4286 | + ''' |
4287 | + |
4288 | + buf = self.buf |
4289 | + hrefs = self.oeb.manifest.hrefs |
4290 | + buf.write(b'<guide>') |
4291 | + for ref in self.oeb.guide.values(): |
4292 | + path = urldefrag(ref.href)[0] |
4293 | + if path not in hrefs or hrefs[path].media_type not in OEB_DOCS: |
4294 | + continue |
4295 | + |
4296 | + buf.write(b'<reference type="') |
4297 | + if ref.type.startswith('other.') : |
4298 | + self.serialize_text(ref.type.replace('other.',''), quot=True) |
4299 | + else: |
4300 | + self.serialize_text(ref.type, quot=True) |
4301 | + buf.write(b'" ') |
4302 | + if ref.title is not None: |
4303 | + buf.write(b'title="') |
4304 | + self.serialize_text(ref.title, quot=True) |
4305 | + buf.write(b'" ') |
4306 | + self.serialize_href(ref.href) |
4307 | + # Space required or won't work, I kid you not |
4308 | + buf.write(b' />') |
4309 | + |
4310 | + buf.write(b'</guide>') |
4311 | + |
4312 | + def serialize_href(self, href, base=None): |
4313 | + ''' |
4314 | + Serialize the href attribute of an <a> or <reference> tag. It is |
4315 | + serialized as filepos="000000000" and a pointer to its location is |
4316 | + stored in self.href_offsets so that the correct value can be filled in |
4317 | + at the end. |
4318 | + ''' |
4319 | + hrefs = self.oeb.manifest.hrefs |
4320 | + path, frag = urldefrag(urlnormalize(href)) |
4321 | + if path and base: |
4322 | + path = base.abshref(path) |
4323 | + if path and path not in hrefs: |
4324 | + return False |
4325 | + buf = self.buf |
4326 | + item = hrefs[path] if path else None |
4327 | + if item and item.spine_position is None: |
4328 | + return False |
4329 | + path = item.href if item else base.href |
4330 | + href = '#'.join((path, frag)) if frag else path |
4331 | + buf.write(b'filepos=') |
4332 | + self.href_offsets[href].append(buf.tell()) |
4333 | + buf.write(b'0000000000') |
4334 | + return True |
4335 | + |
4336 | + def serialize_body(self): |
4337 | + ''' |
4338 | + Serialize all items in the spine of the document. Non linear items are |
4339 | + moved to the end. |
4340 | + ''' |
4341 | + buf = self.buf |
4342 | + self.anchor_offset = buf.tell() |
4343 | + buf.write(b'<body>') |
4344 | + self.anchor_offset_kindle = buf.tell() |
4345 | + spine = [item for item in self.oeb.spine if item.linear] |
4346 | + spine.extend([item for item in self.oeb.spine if not item.linear]) |
4347 | + for item in spine: |
4348 | + self.serialize_item(item) |
4349 | + buf.write(b'</body>') |
4350 | + |
4351 | + def serialize_item(self, item): |
4352 | + ''' |
4353 | + Serialize an individual item from the spine of the input document. |
4354 | + A reference to this item is stored in self.href_offsets |
4355 | + ''' |
4356 | + buf = self.buf |
4357 | + if not item.linear: |
4358 | + self.breaks.append(buf.tell() - 1) |
4359 | + self.id_offsets[urlnormalize(item.href)] = buf.tell() |
4360 | + # Kindle periodical articles are contained in a <div> tag |
4361 | + buf.write(b'<div>') |
4362 | + for elem in item.data.find(XHTML('body')): |
4363 | + self.serialize_elem(elem, item) |
4364 | + # Kindle periodical article end marker |
4365 | + buf.write(b'<div></div>') |
4366 | + if self.write_page_breaks_after_item: |
4367 | + buf.write(b'<mbp:pagebreak/>') |
4368 | + buf.write(b'</div>') |
4369 | + self.anchor_offset = None |
4370 | + |
4371 | + def serialize_elem(self, elem, item, nsrmap=NSRMAP): |
4372 | + buf = self.buf |
4373 | + if not isinstance(elem.tag, basestring) \ |
4374 | + or namespace(elem.tag) not in nsrmap: |
4375 | + return |
4376 | + tag = prefixname(elem.tag, nsrmap) |
4377 | + # Previous layers take care of @name |
4378 | + id_ = elem.attrib.pop('id', None) |
4379 | + if id_: |
4380 | + href = '#'.join((item.href, id_)) |
4381 | + offset = self.anchor_offset or buf.tell() |
4382 | + self.id_offsets[urlnormalize(href)] = offset |
4383 | + if self.anchor_offset is not None and \ |
4384 | + tag == 'a' and not elem.attrib and \ |
4385 | + not len(elem) and not elem.text: |
4386 | + return |
4387 | + self.anchor_offset = buf.tell() |
4388 | + buf.write(b'<') |
4389 | + buf.write(tag.encode('utf-8')) |
4390 | + if elem.attrib: |
4391 | + for attr, val in elem.attrib.items(): |
4392 | + if namespace(attr) not in nsrmap: |
4393 | + continue |
4394 | + attr = prefixname(attr, nsrmap) |
4395 | + buf.write(b' ') |
4396 | + if attr == 'href': |
4397 | + if self.serialize_href(val, item): |
4398 | + continue |
4399 | + elif attr == 'src': |
4400 | + href = urlnormalize(item.abshref(val)) |
4401 | + if href in self.images: |
4402 | + index = self.images[href] |
4403 | + buf.write(b'recindex="%05d"' % index) |
4404 | + continue |
4405 | + buf.write(attr.encode('utf-8')) |
4406 | + buf.write(b'="') |
4407 | + self.serialize_text(val, quot=True) |
4408 | + buf.write(b'"') |
4409 | + buf.write(b'>') |
4410 | + if elem.text or len(elem) > 0: |
4411 | + if elem.text: |
4412 | + self.anchor_offset = None |
4413 | + self.serialize_text(elem.text) |
4414 | + for child in elem: |
4415 | + self.serialize_elem(child, item) |
4416 | + if child.tail: |
4417 | + self.anchor_offset = None |
4418 | + self.serialize_text(child.tail) |
4419 | + buf.write(b'</%s>' % tag.encode('utf-8')) |
4420 | + |
4421 | + def serialize_text(self, text, quot=False): |
4422 | + text = text.replace('&', '&') |
4423 | + text = text.replace('<', '<') |
4424 | + text = text.replace('>', '>') |
4425 | + text = text.replace(u'\u00AD', '') # Soft-hyphen |
4426 | + if quot: |
4427 | + text = text.replace('"', '"') |
4428 | + self.buf.write(text.encode('utf-8')) |
4429 | + |
4430 | + def fixup_links(self): |
4431 | + ''' |
4432 | + Fill in the correct values for all filepos="..." links with the offsets |
4433 | + of the linked to content (as stored in id_offsets). |
4434 | + ''' |
4435 | + buf = self.buf |
4436 | + id_offsets = self.id_offsets |
4437 | + for href, hoffs in self.href_offsets.items(): |
4438 | + # Iterate over all filepos items |
4439 | + if href not in id_offsets: |
4440 | + self.logger.warn('Hyperlink target %r not found' % href) |
4441 | + # Link to the top of the document, better than just ignoring |
4442 | + href, _ = urldefrag(href) |
4443 | + if href in self.id_offsets: |
4444 | + ioff = self.id_offsets[href] |
4445 | + for hoff in hoffs: |
4446 | + buf.seek(hoff) |
4447 | + buf.write(b'%010d' % ioff) |
4448 | + |
4449 | + |
4450 | |
4451 | === modified file 'src/calibre/ebooks/oeb/base.py' |
4452 | --- src/calibre/ebooks/oeb/base.py 2011-06-18 16:00:57 +0000 |
4453 | +++ src/calibre/ebooks/oeb/base.py 2011-07-19 06:18:03 +0000 |
4454 | @@ -1180,8 +1180,9 @@ |
4455 | if memory is None: |
4456 | from calibre.ptempfile import PersistentTemporaryFile |
4457 | pt = PersistentTemporaryFile(suffix='_oeb_base_mem_unloader.img') |
4458 | - pt.write(self._data) |
4459 | - pt.close() |
4460 | + with pt: |
4461 | + pt.write(self._data) |
4462 | + self.oeb._temp_files.append(pt.name) |
4463 | def loader(*args): |
4464 | with open(pt.name, 'rb') as f: |
4465 | ans = f.read() |
4466 | @@ -1196,8 +1197,6 @@ |
4467 | self._loader = loader2 |
4468 | self._data = None |
4469 | |
4470 | - |
4471 | - |
4472 | def __str__(self): |
4473 | data = self.data |
4474 | if isinstance(data, etree._Element): |
4475 | @@ -1913,6 +1912,14 @@ |
4476 | self.toc = TOC() |
4477 | self.pages = PageList() |
4478 | self.auto_generated_toc = True |
4479 | + self._temp_files = [] |
4480 | + |
4481 | + def clean_temp_files(self): |
4482 | + for path in self._temp_files: |
4483 | + try: |
4484 | + os.remove(path) |
4485 | + except: |
4486 | + pass |
4487 | |
4488 | @classmethod |
4489 | def generate(cls, opts): |
4490 | |
4491 | === modified file 'src/calibre/ebooks/oeb/iterator.py' |
4492 | --- src/calibre/ebooks/oeb/iterator.py 2011-06-02 18:42:59 +0000 |
4493 | +++ src/calibre/ebooks/oeb/iterator.py 2011-07-19 06:18:03 +0000 |
4494 | @@ -92,7 +92,7 @@ |
4495 | self.config = DynamicConfig(name='iterator') |
4496 | ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower() |
4497 | ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext) |
4498 | - self.ebook_ext = ext |
4499 | + self.ebook_ext = ext.replace('original_', '') |
4500 | |
4501 | def search(self, text, index, backwards=False): |
4502 | text = text.lower() |
4503 | |
4504 | === modified file 'src/calibre/ebooks/oeb/transforms/flatcss.py' |
4505 | --- src/calibre/ebooks/oeb/transforms/flatcss.py 2011-02-24 16:18:10 +0000 |
4506 | +++ src/calibre/ebooks/oeb/transforms/flatcss.py 2011-07-19 06:18:03 +0000 |
4507 | @@ -318,7 +318,8 @@ |
4508 | for edge in ('top', 'bottom'): |
4509 | cssdict['%s-%s'%(prop, edge)] = '0pt' |
4510 | if self.context.insert_blank_line: |
4511 | - cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em' |
4512 | + cssdict['margin-top'] = cssdict['margin-bottom'] = \ |
4513 | + '%fem'%self.context.insert_blank_line_size |
4514 | if self.context.remove_paragraph_spacing: |
4515 | cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size |
4516 | |
4517 | |
4518 | === modified file 'src/calibre/ebooks/oeb/transforms/guide.py' |
4519 | --- src/calibre/ebooks/oeb/transforms/guide.py 2010-01-29 01:32:52 +0000 |
4520 | +++ src/calibre/ebooks/oeb/transforms/guide.py 2011-07-19 06:18:03 +0000 |
4521 | @@ -36,5 +36,8 @@ |
4522 | href = urldefrag(self.oeb.guide[x].href)[0] |
4523 | if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc', |
4524 | 'title-page', 'copyright-page', 'start'): |
4525 | + item = self.oeb.guide[x] |
4526 | + if item.title and item.title.lower() == 'start': |
4527 | + continue |
4528 | self.oeb.guide.remove(x) |
4529 | |
4530 | |
4531 | === modified file 'src/calibre/ebooks/oeb/transforms/htmltoc.py' |
4532 | --- src/calibre/ebooks/oeb/transforms/htmltoc.py 2010-09-02 13:48:45 +0000 |
4533 | +++ src/calibre/ebooks/oeb/transforms/htmltoc.py 2011-07-19 06:18:03 +0000 |
4534 | @@ -45,9 +45,10 @@ |
4535 | } |
4536 | |
4537 | class HTMLTOCAdder(object): |
4538 | - def __init__(self, title=None, style='nested'): |
4539 | + def __init__(self, title=None, style='nested', position='end'): |
4540 | self.title = title |
4541 | self.style = style |
4542 | + self.position = position |
4543 | |
4544 | @classmethod |
4545 | def config(cls, cfg): |
4546 | @@ -98,7 +99,10 @@ |
4547 | self.add_toc_level(body, oeb.toc) |
4548 | id, href = oeb.manifest.generate('contents', 'contents.xhtml') |
4549 | item = oeb.manifest.add(id, href, XHTML_MIME, data=contents) |
4550 | - oeb.spine.add(item, linear=False) |
4551 | + if self.position == 'end': |
4552 | + oeb.spine.add(item, linear=False) |
4553 | + else: |
4554 | + oeb.spine.insert(0, item, linear=True) |
4555 | oeb.guide.add('toc', 'Table of Contents', href) |
4556 | |
4557 | def add_toc_level(self, elem, toc): |
4558 | |
4559 | === modified file 'src/calibre/gui2/__init__.py' |
4560 | --- src/calibre/gui2/__init__.py 2011-07-10 00:34:12 +0000 |
4561 | +++ src/calibre/gui2/__init__.py 2011-07-19 06:18:03 +0000 |
4562 | @@ -15,7 +15,6 @@ |
4563 | from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx, |
4564 | config_dir) |
4565 | from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig |
4566 | -from calibre.utils.localization import set_qt_translator |
4567 | from calibre.ebooks.metadata import MetaInformation |
4568 | from calibre.utils.date import UNDEFINED_DATE |
4569 | |
4570 | @@ -631,6 +630,22 @@ |
4571 | nw = min(self.width(), nw) |
4572 | self.resize(nw, nh) |
4573 | |
4574 | +class Translator(QTranslator): |
4575 | + ''' |
4576 | + Translator to load translations for strings in Qt from the calibre |
4577 | + translations. Does not support advanced features of Qt like disambiguation |
4578 | + and plural forms. |
4579 | + ''' |
4580 | + |
4581 | + def translate(self, *args, **kwargs): |
4582 | + try: |
4583 | + src = unicode(args[1]) |
4584 | + except: |
4585 | + return u'' |
4586 | + t = _ |
4587 | + return t(src) |
4588 | + |
4589 | + |
4590 | gui_thread = None |
4591 | |
4592 | qt_app = None |
4593 | @@ -677,9 +692,8 @@ |
4594 | def load_translations(self): |
4595 | if self._translator is not None: |
4596 | self.removeTranslator(self._translator) |
4597 | - self._translator = QTranslator(self) |
4598 | - if set_qt_translator(self._translator): |
4599 | - self.installTranslator(self._translator) |
4600 | + self._translator = Translator(self) |
4601 | + self.installTranslator(self._translator) |
4602 | |
4603 | def event(self, e): |
4604 | if callable(self.file_event_hook) and e.type() == QEvent.FileOpen: |
4605 | |
4606 | === modified file 'src/calibre/gui2/actions/convert.py' |
4607 | --- src/calibre/gui2/actions/convert.py 2011-06-02 01:28:36 +0000 |
4608 | +++ src/calibre/gui2/actions/convert.py 2011-07-19 06:18:03 +0000 |
4609 | @@ -12,7 +12,7 @@ |
4610 | |
4611 | from calibre.gui2 import error_dialog, Dispatcher |
4612 | from calibre.gui2.tools import convert_single_ebook, convert_bulk_ebook |
4613 | -from calibre.utils.config import prefs |
4614 | +from calibre.utils.config import prefs, tweaks |
4615 | from calibre.gui2.actions import InterfaceAction |
4616 | from calibre.customize.ui import plugin_for_input_format |
4617 | |
4618 | @@ -118,6 +118,8 @@ |
4619 | def queue_convert_jobs(self, jobs, changed, bad, rows, previous, |
4620 | converted_func, extra_job_args=[]): |
4621 | for func, args, desc, fmt, id, temp_files in jobs: |
4622 | + func, _, same_fmt = func.partition(':') |
4623 | + same_fmt = same_fmt == 'same_fmt' |
4624 | input_file = args[0] |
4625 | input_fmt = os.path.splitext(input_file)[1] |
4626 | core_usage = 1 |
4627 | @@ -131,6 +133,7 @@ |
4628 | job = self.gui.job_manager.run_job(Dispatcher(converted_func), |
4629 | func, args=args, description=desc, |
4630 | core_usage=core_usage) |
4631 | + job.conversion_of_same_fmt = same_fmt |
4632 | args = [temp_files, fmt, id]+extra_job_args |
4633 | self.conversion_jobs[job] = tuple(args) |
4634 | |
4635 | @@ -166,14 +169,18 @@ |
4636 | if job.failed: |
4637 | self.gui.job_exception(job) |
4638 | return |
4639 | + same_fmt = getattr(job, 'conversion_of_same_fmt', False) |
4640 | fmtf = temp_files[-1].name |
4641 | if os.stat(fmtf).st_size < 1: |
4642 | raise Exception(_('Empty output file, ' |
4643 | 'probably the conversion process crashed')) |
4644 | |
4645 | + db = self.gui.current_db |
4646 | + if same_fmt and tweaks['save_original_format']: |
4647 | + db.save_original_format(book_id, fmt, notify=False) |
4648 | + |
4649 | with open(temp_files[-1].name, 'rb') as data: |
4650 | - self.gui.library_view.model().db.add_format(book_id, \ |
4651 | - fmt, data, index_is_id=True) |
4652 | + db.add_format(book_id, fmt, data, index_is_id=True) |
4653 | self.gui.status_bar.show_message(job.description + \ |
4654 | (' completed'), 2000) |
4655 | finally: |
4656 | |
4657 | === modified file 'src/calibre/gui2/actions/delete.py' |
4658 | --- src/calibre/gui2/actions/delete.py 2011-06-04 20:00:28 +0000 |
4659 | +++ src/calibre/gui2/actions/delete.py 2011-07-19 06:18:03 +0000 |
4660 | @@ -81,7 +81,7 @@ |
4661 | class DeleteAction(InterfaceAction): |
4662 | |
4663 | name = 'Remove Books' |
4664 | - action_spec = (_('Remove books'), 'trash.png', None, _('Del')) |
4665 | + action_spec = (_('Remove books'), 'trash.png', None, 'Del') |
4666 | action_type = 'current' |
4667 | |
4668 | def genesis(self): |
4669 | |
4670 | === modified file 'src/calibre/gui2/actions/view.py' |
4671 | --- src/calibre/gui2/actions/view.py 2011-05-01 16:33:10 +0000 |
4672 | +++ src/calibre/gui2/actions/view.py 2011-07-19 06:18:03 +0000 |
4673 | @@ -128,7 +128,8 @@ |
4674 | self.gui.unsetCursor() |
4675 | |
4676 | def _view_file(self, name): |
4677 | - ext = os.path.splitext(name)[1].upper().replace('.', '') |
4678 | + ext = os.path.splitext(name)[1].upper().replace('.', |
4679 | + '').replace('ORIGINAL_', '') |
4680 | viewer = 'lrfviewer' if ext == 'LRF' else 'ebook-viewer' |
4681 | internal = ext in config['internally_viewed_formats'] |
4682 | self._launch_viewer(name, viewer, internal) |
4683 | |
4684 | === modified file 'src/calibre/gui2/convert/look_and_feel.py' |
4685 | --- src/calibre/gui2/convert/look_and_feel.py 2011-01-06 19:46:34 +0000 |
4686 | +++ src/calibre/gui2/convert/look_and_feel.py 2011-07-19 06:18:03 +0000 |
4687 | @@ -24,7 +24,10 @@ |
4688 | 'font_size_mapping', 'line_height', 'minimum_line_height', |
4689 | 'linearize_tables', 'smarten_punctuation', |
4690 | 'disable_font_rescaling', 'insert_blank_line', |
4691 | - 'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding', |
4692 | + 'remove_paragraph_spacing', |
4693 | + 'remove_paragraph_spacing_indent_size', |
4694 | + 'insert_blank_line_size', |
4695 | + 'input_encoding', |
4696 | 'asciiize', 'keep_ligatures'] |
4697 | ) |
4698 | for val, text in [ |
4699 | |
4700 | === modified file 'src/calibre/gui2/convert/look_and_feel.ui' |
4701 | --- src/calibre/gui2/convert/look_and_feel.ui 2011-01-25 21:40:18 +0000 |
4702 | +++ src/calibre/gui2/convert/look_and_feel.ui 2011-07-19 06:18:03 +0000 |
4703 | @@ -6,7 +6,7 @@ |
4704 | <rect> |
4705 | <x>0</x> |
4706 | <y>0</y> |
4707 | - <width>600</width> |
4708 | + <width>642</width> |
4709 | <height>500</height> |
4710 | </rect> |
4711 | </property> |
4712 | @@ -31,7 +31,7 @@ |
4713 | </property> |
4714 | </widget> |
4715 | </item> |
4716 | - <item row="1" column="1" colspan="2"> |
4717 | + <item row="1" column="1"> |
4718 | <widget class="QDoubleSpinBox" name="opt_base_font_size"> |
4719 | <property name="suffix"> |
4720 | <string> pt</string> |
4721 | @@ -97,6 +97,29 @@ |
4722 | </item> |
4723 | </layout> |
4724 | </item> |
4725 | + <item row="3" column="0"> |
4726 | + <widget class="QLabel" name="label_6"> |
4727 | + <property name="text"> |
4728 | + <string>Minimum &line height:</string> |
4729 | + </property> |
4730 | + <property name="buddy"> |
4731 | + <cstring>opt_minimum_line_height</cstring> |
4732 | + </property> |
4733 | + </widget> |
4734 | + </item> |
4735 | + <item row="3" column="1"> |
4736 | + <widget class="QDoubleSpinBox" name="opt_minimum_line_height"> |
4737 | + <property name="suffix"> |
4738 | + <string> %</string> |
4739 | + </property> |
4740 | + <property name="decimals"> |
4741 | + <number>1</number> |
4742 | + </property> |
4743 | + <property name="maximum"> |
4744 | + <double>900.000000000000000</double> |
4745 | + </property> |
4746 | + </widget> |
4747 | + </item> |
4748 | <item row="4" column="0"> |
4749 | <widget class="QLabel" name="label"> |
4750 | <property name="text"> |
4751 | @@ -107,7 +130,7 @@ |
4752 | </property> |
4753 | </widget> |
4754 | </item> |
4755 | - <item row="4" column="1" colspan="2"> |
4756 | + <item row="4" column="1"> |
4757 | <widget class="QDoubleSpinBox" name="opt_line_height"> |
4758 | <property name="suffix"> |
4759 | <string> pt</string> |
4760 | @@ -127,6 +150,13 @@ |
4761 | </property> |
4762 | </widget> |
4763 | </item> |
4764 | + <item row="5" column="1" colspan="2"> |
4765 | + <widget class="EncodingComboBox" name="opt_input_encoding"> |
4766 | + <property name="editable"> |
4767 | + <bool>true</bool> |
4768 | + </property> |
4769 | + </widget> |
4770 | + </item> |
4771 | <item row="6" column="0" colspan="2"> |
4772 | <widget class="QCheckBox" name="opt_remove_paragraph_spacing"> |
4773 | <property name="text"> |
4774 | @@ -134,48 +164,58 @@ |
4775 | </property> |
4776 | </widget> |
4777 | </item> |
4778 | - <item row="6" column="2" colspan="2"> |
4779 | - <layout class="QHBoxLayout" name="horizontalLayout_2"> |
4780 | - <item> |
4781 | - <widget class="QLabel" name="label_4"> |
4782 | - <property name="text"> |
4783 | - <string>Indent size:</string> |
4784 | - </property> |
4785 | - <property name="alignment"> |
4786 | - <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set> |
4787 | - </property> |
4788 | - </widget> |
4789 | - </item> |
4790 | - <item> |
4791 | - <widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size"> |
4792 | - <property name="toolTip"> |
4793 | - <string><p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string> |
4794 | - </property> |
4795 | - <property name="suffix"> |
4796 | - <string> em</string> |
4797 | - </property> |
4798 | - <property name="decimals"> |
4799 | - <number>1</number> |
4800 | - </property> |
4801 | - </widget> |
4802 | - </item> |
4803 | - </layout> |
4804 | - </item> |
4805 | - <item row="7" column="0"> |
4806 | + <item row="7" column="0" colspan="2"> |
4807 | + <widget class="QCheckBox" name="opt_insert_blank_line"> |
4808 | + <property name="text"> |
4809 | + <string>Insert &blank line between paragraphs</string> |
4810 | + </property> |
4811 | + </widget> |
4812 | + </item> |
4813 | + <item row="7" column="4"> |
4814 | + <widget class="QDoubleSpinBox" name="opt_insert_blank_line_size"> |
4815 | + <property name="suffix"> |
4816 | + <string> em</string> |
4817 | + </property> |
4818 | + <property name="decimals"> |
4819 | + <number>1</number> |
4820 | + </property> |
4821 | + </widget> |
4822 | + </item> |
4823 | + <item row="8" column="0"> |
4824 | <widget class="QLabel" name="label_5"> |
4825 | <property name="text"> |
4826 | - <string>Text justification:</string> |
4827 | + <string>Text &justification:</string> |
4828 | + </property> |
4829 | + <property name="buddy"> |
4830 | + <cstring>opt_change_justification</cstring> |
4831 | </property> |
4832 | </widget> |
4833 | </item> |
4834 | - <item row="8" column="0"> |
4835 | + <item row="8" column="2" colspan="3"> |
4836 | + <widget class="QComboBox" name="opt_change_justification"/> |
4837 | + </item> |
4838 | + <item row="9" column="0"> |
4839 | <widget class="QCheckBox" name="opt_linearize_tables"> |
4840 | <property name="text"> |
4841 | <string>&Linearize tables</string> |
4842 | </property> |
4843 | </widget> |
4844 | </item> |
4845 | - <item row="11" column="0" colspan="4"> |
4846 | + <item row="9" column="1" colspan="4"> |
4847 | + <widget class="QCheckBox" name="opt_asciiize"> |
4848 | + <property name="text"> |
4849 | + <string>&Transliterate unicode characters to ASCII</string> |
4850 | + </property> |
4851 | + </widget> |
4852 | + </item> |
4853 | + <item row="10" column="1" colspan="2"> |
4854 | + <widget class="QCheckBox" name="opt_keep_ligatures"> |
4855 | + <property name="text"> |
4856 | + <string>Keep &ligatures</string> |
4857 | + </property> |
4858 | + </widget> |
4859 | + </item> |
4860 | + <item row="12" column="0" colspan="5"> |
4861 | <widget class="QGroupBox" name="groupBox"> |
4862 | <property name="title"> |
4863 | <string>Extra &CSS</string> |
4864 | @@ -187,27 +227,16 @@ |
4865 | </layout> |
4866 | </widget> |
4867 | </item> |
4868 | - <item row="7" column="2" colspan="2"> |
4869 | - <widget class="QComboBox" name="opt_change_justification"/> |
4870 | - </item> |
4871 | - <item row="8" column="1" colspan="3"> |
4872 | - <widget class="QCheckBox" name="opt_asciiize"> |
4873 | - <property name="text"> |
4874 | - <string>&Transliterate unicode characters to ASCII</string> |
4875 | - </property> |
4876 | - </widget> |
4877 | - </item> |
4878 | - <item row="9" column="0"> |
4879 | - <widget class="QCheckBox" name="opt_insert_blank_line"> |
4880 | - <property name="text"> |
4881 | - <string>Insert &blank line</string> |
4882 | - </property> |
4883 | - </widget> |
4884 | - </item> |
4885 | - <item row="9" column="1" colspan="2"> |
4886 | - <widget class="QCheckBox" name="opt_keep_ligatures"> |
4887 | - <property name="text"> |
4888 | - <string>Keep &ligatures</string> |
4889 | + <item row="6" column="4"> |
4890 | + <widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size"> |
4891 | + <property name="toolTip"> |
4892 | + <string><p>When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string> |
4893 | + </property> |
4894 | + <property name="suffix"> |
4895 | + <string> em</string> |
4896 | + </property> |
4897 | + <property name="decimals"> |
4898 | + <number>1</number> |
4899 | </property> |
4900 | </widget> |
4901 | </item> |
4902 | @@ -218,33 +247,29 @@ |
4903 | </property> |
4904 | </widget> |
4905 | </item> |
4906 | - <item row="3" column="0"> |
4907 | - <widget class="QLabel" name="label_6"> |
4908 | - <property name="text"> |
4909 | - <string>Minimum &line height:</string> |
4910 | - </property> |
4911 | - <property name="buddy"> |
4912 | - <cstring>opt_minimum_line_height</cstring> |
4913 | - </property> |
4914 | - </widget> |
4915 | - </item> |
4916 | - <item row="3" column="1" colspan="2"> |
4917 | - <widget class="QDoubleSpinBox" name="opt_minimum_line_height"> |
4918 | - <property name="suffix"> |
4919 | - <string> %</string> |
4920 | - </property> |
4921 | - <property name="decimals"> |
4922 | - <number>1</number> |
4923 | - </property> |
4924 | - <property name="maximum"> |
4925 | - <double>900.000000000000000</double> |
4926 | - </property> |
4927 | - </widget> |
4928 | - </item> |
4929 | - <item row="5" column="1" colspan="3"> |
4930 | - <widget class="EncodingComboBox" name="opt_input_encoding"> |
4931 | - <property name="editable"> |
4932 | - <bool>true</bool> |
4933 | + <item row="6" column="3"> |
4934 | + <widget class="QLabel" name="label_4"> |
4935 | + <property name="text"> |
4936 | + <string>&Indent size:</string> |
4937 | + </property> |
4938 | + <property name="alignment"> |
4939 | + <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set> |
4940 | + </property> |
4941 | + <property name="buddy"> |
4942 | + <cstring>opt_remove_paragraph_spacing_indent_size</cstring> |
4943 | + </property> |
4944 | + </widget> |
4945 | + </item> |
4946 | + <item row="7" column="3"> |
4947 | + <widget class="QLabel" name="label_7"> |
4948 | + <property name="text"> |
4949 | + <string>&Line size:</string> |
4950 | + </property> |
4951 | + <property name="alignment"> |
4952 | + <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set> |
4953 | + </property> |
4954 | + <property name="buddy"> |
4955 | + <cstring>opt_insert_blank_line_size</cstring> |
4956 | </property> |
4957 | </widget> |
4958 | </item> |
4959 | |
4960 | === modified file 'src/calibre/gui2/convert/mobi_output.py' |
4961 | --- src/calibre/gui2/convert/mobi_output.py 2010-12-20 20:20:21 +0000 |
4962 | +++ src/calibre/gui2/convert/mobi_output.py 2011-07-19 06:18:03 +0000 |
4963 | @@ -24,7 +24,7 @@ |
4964 | def __init__(self, parent, get_option, get_help, db=None, book_id=None): |
4965 | Widget.__init__(self, parent, |
4966 | ['prefer_author_sort', 'rescale_images', 'toc_title', |
4967 | - 'mobi_ignore_margins', |
4968 | + 'mobi_ignore_margins', 'mobi_toc_at_start', |
4969 | 'dont_compress', 'no_inline_toc', 'masthead_font','personal_doc'] |
4970 | ) |
4971 | from calibre.utils.fonts import fontconfig |
4972 | |
4973 | === modified file 'src/calibre/gui2/convert/mobi_output.ui' |
4974 | --- src/calibre/gui2/convert/mobi_output.ui 2010-12-12 19:09:43 +0000 |
4975 | +++ src/calibre/gui2/convert/mobi_output.ui 2011-07-19 06:18:03 +0000 |
4976 | @@ -27,21 +27,21 @@ |
4977 | <item row="1" column="1"> |
4978 | <widget class="QLineEdit" name="opt_toc_title"/> |
4979 | </item> |
4980 | - <item row="2" column="0" colspan="2"> |
4981 | + <item row="4" column="0" colspan="2"> |
4982 | <widget class="QCheckBox" name="opt_rescale_images"> |
4983 | <property name="text"> |
4984 | <string>Rescale images for &Palm devices</string> |
4985 | </property> |
4986 | </widget> |
4987 | </item> |
4988 | - <item row="3" column="0" colspan="2"> |
4989 | + <item row="5" column="0" colspan="2"> |
4990 | <widget class="QCheckBox" name="opt_prefer_author_sort"> |
4991 | <property name="text"> |
4992 | <string>Use author &sort for author</string> |
4993 | </property> |
4994 | </widget> |
4995 | </item> |
4996 | - <item row="4" column="0"> |
4997 | + <item row="6" column="0"> |
4998 | <widget class="QCheckBox" name="opt_dont_compress"> |
4999 | <property name="text"> |
5000 | <string>Disable compression of the file contents</string> |
There are a few issues:
1) Searches for "Червей под есенен вятър" and "alex" produce no results with this plugin. Searches for those queries on the web site produces results.
2) Please use != instead of <> as <> is obsolete.
3) Put the entries in builtins in alphabetical order.