Merge lp:~realender/calibre/calibre into lp:~user-none/calibre/store

Proposed by Alex Stanev
Status: Merged
Merged at revision: 8441
Proposed branch: lp:~realender/calibre/calibre
Merge into: lp:~user-none/calibre/store
Diff against target: 363397 lines (+81812/-43845)
143 files modified
Changelog.yaml (+53/-1)
recipes/freakonomics.recipe (+15/-11)
recipes/nikkei_news.recipe (+88/-0)
recipes/techcrunch.recipe (+63/-0)
recipes/tijolaco.recipe (+24/-0)
recipes/time_magazine.recipe (+58/-69)
recipes/vio_mundo.recipe (+30/-0)
recipes/wired_uk.recipe (+111/-36)
recipes/zeitde.recipe (+6/-5)
recipes/zeitde_sub.recipe (+136/-34)
resources/default_tweaks.py (+7/-0)
resources/templates/fb2.xsl (+6/-2)
session.vim (+2/-2)
setup/check.py (+6/-15)
setup/translations.py (+27/-22)
src/calibre/__init__.py (+2/-2)
src/calibre/constants.py (+1/-1)
src/calibre/customize/builtins.py (+11/-0)
src/calibre/db/backend.py (+89/-6)
src/calibre/db/cache.py (+377/-2)
src/calibre/db/fields.py (+257/-0)
src/calibre/db/locking.py (+153/-6)
src/calibre/db/tables.py (+50/-8)
src/calibre/db/view.py (+109/-0)
src/calibre/devices/android/driver.py (+4/-2)
src/calibre/devices/kobo/driver.py (+175/-170)
src/calibre/devices/usbms/device.py (+7/-2)
src/calibre/devices/usbms/driver.py (+23/-5)
src/calibre/ebooks/__init__.py (+4/-2)
src/calibre/ebooks/chardet/__init__.py (+5/-1)
src/calibre/ebooks/conversion/cli.py (+3/-1)
src/calibre/ebooks/conversion/plumber.py (+11/-3)
src/calibre/ebooks/htmlz/input.py (+40/-6)
src/calibre/ebooks/metadata/book/base.py (+1/-1)
src/calibre/ebooks/mobi/debug.py (+170/-23)
src/calibre/ebooks/mobi/output.py (+19/-11)
src/calibre/ebooks/mobi/writer.py (+17/-13)
src/calibre/ebooks/mobi/writer2/__init__.py (+15/-0)
src/calibre/ebooks/mobi/writer2/main.py (+579/-0)
src/calibre/ebooks/mobi/writer2/serializer.py (+246/-0)
src/calibre/ebooks/oeb/base.py (+11/-4)
src/calibre/ebooks/oeb/iterator.py (+1/-1)
src/calibre/ebooks/oeb/transforms/flatcss.py (+2/-1)
src/calibre/ebooks/oeb/transforms/guide.py (+3/-0)
src/calibre/ebooks/oeb/transforms/htmltoc.py (+6/-2)
src/calibre/gui2/__init__.py (+18/-4)
src/calibre/gui2/actions/convert.py (+10/-3)
src/calibre/gui2/actions/delete.py (+1/-1)
src/calibre/gui2/actions/view.py (+2/-1)
src/calibre/gui2/convert/look_and_feel.py (+4/-1)
src/calibre/gui2/convert/look_and_feel.ui (+107/-82)
src/calibre/gui2/convert/mobi_output.py (+1/-1)
src/calibre/gui2/convert/mobi_output.ui (+13/-6)
src/calibre/gui2/custom_column_widgets.py (+1/-0)
src/calibre/gui2/dialogs/jobs.ui (+1/-4)
src/calibre/gui2/jobs.py (+20/-11)
src/calibre/gui2/library/delegates.py (+28/-3)
src/calibre/gui2/metadata/basic_widgets.py (+68/-8)
src/calibre/gui2/metadata/single.py (+3/-1)
src/calibre/gui2/preferences/look_feel.py (+7/-2)
src/calibre/gui2/preferences/toolbar.ui (+67/-79)
src/calibre/gui2/store/stores/chitanka_plugin.py (+140/-0)
src/calibre/gui2/tools.py (+14/-4)
src/calibre/gui2/viewer/main.py (+7/-6)
src/calibre/gui2/wizard/__init__.py (+10/-5)
src/calibre/library/database2.py (+17/-0)
src/calibre/library/save_to_disk.py (+3/-2)
src/calibre/manual/faq.rst (+21/-3)
src/calibre/manual/gui.rst (+2/-1)
src/calibre/ptempfile.py (+11/-1)
src/calibre/translations/af.po (+1029/-601)
src/calibre/translations/ar.po (+1628/-916)
src/calibre/translations/ast.po (+1029/-601)
src/calibre/translations/az.po (+1029/-601)
src/calibre/translations/bg.po (+1051/-604)
src/calibre/translations/bn.po (+1029/-601)
src/calibre/translations/br.po (+1029/-601)
src/calibre/translations/bs.po (+1030/-601)
src/calibre/translations/ca.po (+1381/-684)
src/calibre/translations/calibre.pot (+276/-162)
src/calibre/translations/cs.po (+1257/-650)
src/calibre/translations/da.po (+1244/-644)
src/calibre/translations/de.po (+1301/-662)
src/calibre/translations/el.po (+1042/-606)
src/calibre/translations/en_AU.po (+1029/-601)
src/calibre/translations/en_CA.po (+1059/-617)
src/calibre/translations/en_GB.po (+1417/-786)
src/calibre/translations/eo.po (+1029/-601)
src/calibre/translations/es.po (+1389/-692)
src/calibre/translations/et.po (+1029/-601)
src/calibre/translations/eu.po (+1248/-656)
src/calibre/translations/fa.po (+1029/-601)
src/calibre/translations/fi.po (+1034/-602)
src/calibre/translations/fo.po (+1029/-601)
src/calibre/translations/fr.po (+1449/-688)
src/calibre/translations/gl.po (+1317/-674)
src/calibre/translations/he.po (+1057/-607)
src/calibre/translations/hi.po (+1029/-601)
src/calibre/translations/hr.po (+1145/-632)
src/calibre/translations/hu.po (+1207/-643)
src/calibre/translations/id.po (+1029/-601)
src/calibre/translations/it.po (+1272/-665)
src/calibre/translations/ja.po (+1407/-679)
src/calibre/translations/ko.po (+1154/-629)
src/calibre/translations/lt.po (+1030/-601)
src/calibre/translations/ltg.po (+1029/-601)
src/calibre/translations/lv.po (+1034/-602)
src/calibre/translations/ml.po (+1029/-601)
src/calibre/translations/mr.po (+1029/-601)
src/calibre/translations/ms.po (+1029/-601)
src/calibre/translations/nb.po (+1256/-657)
src/calibre/translations/nds.po (+1135/-631)
src/calibre/translations/nl.po (+1381/-684)
src/calibre/translations/oc.po (+1029/-601)
src/calibre/translations/pa.po (+1029/-601)
src/calibre/translations/pl.po (+1748/-712)
src/calibre/translations/pt.po (+1213/-647)
src/calibre/translations/pt_BR.po (+1189/-642)
src/calibre/translations/ro.po (+1130/-624)
src/calibre/translations/ru.po (+1187/-636)
src/calibre/translations/sc.po (+1029/-601)
src/calibre/translations/si.po (+1029/-601)
src/calibre/translations/sk.po (+1097/-616)
src/calibre/translations/sl.po (+1162/-628)
src/calibre/translations/sq.po (+1033/-605)
src/calibre/translations/sr.po (+1251/-651)
src/calibre/translations/sv.po (+1336/-677)
src/calibre/translations/ta.po (+1029/-601)
src/calibre/translations/te.po (+1029/-601)
src/calibre/translations/th.po (+1034/-602)
src/calibre/translations/tr.po (+1171/-669)
src/calibre/translations/uk.po (+1118/-619)
src/calibre/translations/ur.po (+1029/-601)
src/calibre/translations/vi.po (+1091/-620)
src/calibre/translations/wa.po (+1029/-601)
src/calibre/translations/yi.po (+1029/-601)
src/calibre/translations/zh_CN.po (+1230/-638)
src/calibre/translations/zh_HK.po (+1033/-605)
src/calibre/translations/zh_TW.po (+1236/-639)
src/calibre/utils/ipc/launch.py (+11/-2)
src/calibre/utils/localization.py (+12/-12)
src/calibre/utils/magick/draw.py (+2/-2)
src/calibre/web/feeds/news.py (+3/-1)
To merge this branch: bzr merge lp:~realender/calibre/calibre
Reviewer Review Type Date Requested Status
Alex Stanev (community) Needs Resubmitting
John Schember Needs Fixing
Review via email: mp+68240@code.launchpad.net

Commit message

chitanka.info search plugin

Description of the change

chitanka.info plugin, DRM-free ebooks, available in fb2, epub, txt and sfb formats. Books in Bulgarian.

To post a comment you must log in.
Revision history for this message
John Schember (user-none) wrote :

There are a few issues:

1) Searches for "Червей под есенен вятър" and "alex" produce no results with this plugin. Searches for those queries on the web site produces results.

2) Please use != instead of <> as <> is obsolete.

3) Put the entries in builtins in alphabetical order.

review: Needs Fixing
lp:~realender/calibre/calibre updated
9899. By Alex Stanev

Fixed compare and ordering

9900. By Alex Stanev

Fix whitespace

Revision history for this message
Alex Stanev (realender) wrote :

Hi John,

1)
Chitanka has slightly complicated structure of works in it. It divides them in books, poems, productions, novels and etc. The plugin searches only with books type currently. "Червей под есенен вятър" is a single novel, so it is not in results. The "alex" search returns works by Alexandre Dumas and etc. The plugin returns the right books, but there is additional filtering in Calibre itself, which prevents showing them because chitanka finds "alex" in english but returns "Александър Дюма" in bulgarian as author. I believe it's just OK for now.
2) and 3) fixed and pushed.

> There are a few issues:
>
> 1) Searches for "Червей под есенен вятър" and "alex" produce no results with
> this plugin. Searches for those queries on the web site produces results.
>
> 2) Please use != instead of <> as <> is obsolete.
>
> 3) Put the entries in builtins in alphabetical order.

review: Needs Resubmitting

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'Changelog.yaml'
2--- Changelog.yaml 2011-07-08 17:01:37 +0000
3+++ Changelog.yaml 2011-07-19 06:18:03 +0000
4@@ -19,6 +19,58 @@
5 # new recipes:
6 # - title:
7
8+- version: 0.8.10
9+ date: 2011-07-15
10+
11+ new features:
12+ - title: "Add a right click menu to the cover browser. It allows you to view a book, edit metadata etc. from within the cover browser. The menu can be customized in Preferences->Toolbars"
13+
14+ - title: "Allow selecting and stopping multiple jobs at once in the jobs window"
15+ tickets: [810349]
16+
17+ - title: "When editing metadata directly in the book list, have a little pop up menu so that all existing values can be accessed by mouse only. For example, when you edit authors, you can use the mouse to select an existing author."
18+
19+ - title: "Get Books: Add ebook.nl and fix price parsing for the legimi store"
20+
21+ - title: "Drivers for Samsung Infuse and Motorola XPERT"
22+
23+ - title: "Tag Browser: Make hierarchical items work in group searched terms."
24+
25+ bug fixes:
26+ - title: "Allow setting numbers larger than 99 in custom series columns"
27+
28+ - title: "Fix a bug that caused the same news download sent via a USB connection to the device on two different days resulting in a duplicate on the device"
29+
30+ - title: "Ensure English in the list of interface languages in Preferences is always listed in English, so that it does not become hard to find"
31+
32+ - title: "SNB Output: Fix bug in handling unicode file names"
33+
34+ - title: "Fix sorting problem in manage categories. Fix poor performance problem when dropping multiple books onto a user category."
35+
36+ - title: "Remove 'empty field' error dialogs in bulk search/replace, instead setting the fields to their default value."
37+
38+ - title: "Fix regression that broke communicating with Kobo devices using outdated firmware"
39+ tickets: [807832]
40+
41+ - title: "LRF Input: Fix conversion of LRF files with non ascii titles on some windows systems"
42+ tickets: [807641]
43+
44+ improved recipes:
45+ - Time
46+ - Freakonomics Blog
47+ - io9
48+ - "Computer Act!ve"
49+
50+ new recipes:
51+ - title: Techcrunch and Pecat
52+ author: Darko Miletic
53+
54+ - title: Vio Mundo, IDG Now and Tojolaco
55+ author: Diniz Bortoletto
56+
57+ - title: Geek and Poke, Automatiseringgids IT
58+ author: DrMerry
59+
60 - version: 0.8.9
61 date: 2011-07-08
62
63@@ -32,7 +84,7 @@
64 - title: "Conversion pipeline: Add option to control if duplicate entries are allowed when generating the Table of Contents from links."
65 tickets: [806095]
66
67- - title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downlaoding published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not."
68+ - title: "Metadata download: When merging results, if the query to the xisbn service hangs, wait no more than 10 seconds. Also try harder to preserve the month when downloading published date. Do not throw away isbnless results if there are some sources that return isbns and some that do not."
69 tickets: [798309]
70
71 - title: "Get Books: Remove OpenLibrary since it has the same files as archive.org. Allow direct downloading from Project Gutenberg."
72
73=== modified file 'recipes/freakonomics.recipe'
74--- recipes/freakonomics.recipe 2010-05-23 17:29:13 +0000
75+++ recipes/freakonomics.recipe 2011-07-19 06:18:03 +0000
76@@ -1,25 +1,29 @@
77 #!/usr/bin/env python
78 __license__ = 'GPL v3'
79-__copyright__ = '2009, Kovid Goyal kovid@kovidgoyal.net'
80+__copyright__ = '2011, Starson17'
81 __docformat__ = 'restructuredtext en'
82
83 from calibre.web.feeds.news import BasicNewsRecipe
84
85 class Freakonomics(BasicNewsRecipe):
86-
87 title = 'Freakonomics Blog'
88 description = 'The Hidden side of everything'
89- __author__ = 'Starson17'
90+ __author__ = 'Starson17'
91+ __version__ = '1.02'
92+ __date__ = '11 July 2011'
93 language = 'en'
94 cover_url = 'http://ilkerugur.files.wordpress.com/2009/04/freakonomics.jpg'
95-
96- feeds = [('Blog', 'http://feeds.feedburner.com/freakonomicsblog')]
97-
98- keep_only_tags = [dict(name='div', attrs={'id':'header'}),
99- dict(name='h1'),
100- dict(name='h2'),
101- dict(name='div', attrs={'class':'entry-content'}),
102- ]
103+ use_embedded_content= False
104+ no_stylesheets = True
105+ oldest_article = 30
106+ remove_javascript = True
107+ remove_empty_feeds = True
108+ max_articles_per_feed = 50
109+
110+ feeds = [(u'Freakonomics Blog', u'http://www.freakonomics.com/feed/')]
111+ keep_only_tags = [dict(name='div', attrs={'id':['content']})]
112+ remove_tags_after = [dict(name='div', attrs={'class':['simple_socialmedia']})]
113+ remove_tags = [dict(name='div', attrs={'class':['simple_socialmedia','single-fb-share','wp-polls']})]
114 extra_css = '''
115 h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
116 h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
117
118=== added file 'recipes/icons/techcrunch.png'
119Binary files recipes/icons/techcrunch.png 1970-01-01 00:00:00 +0000 and recipes/icons/techcrunch.png 2011-07-19 06:18:03 +0000 differ
120=== added file 'recipes/nikkei_news.recipe'
121--- recipes/nikkei_news.recipe 1970-01-01 00:00:00 +0000
122+++ recipes/nikkei_news.recipe 2011-07-19 06:18:03 +0000
123@@ -0,0 +1,88 @@
124+from calibre.web.feeds.recipes import BasicNewsRecipe
125+import re
126+
127+#import pprint, sys
128+#pp = pprint.PrettyPrinter(indent=4)
129+
130+class NikkeiNet_paper_subscription(BasicNewsRecipe):
131+ title = u'\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\uFF08\u671D\u520A\u30FB\u5915\u520A\uFF09'
132+ __author__ = 'Ado Nishimura'
133+ description = u'\u65E5\u7D4C\u96FB\u5B50\u7248\u306B\u3088\u308B\u65E5\u672C\u7D4C\u6E08\u65B0\u805E\u3002\u671D\u520A\u30FB\u5915\u520A\u306F\u53D6\u5F97\u6642\u9593\u306B\u3088\u308A\u5207\u308A\u66FF\u308F\u308A\u307E\u3059\u3002\u8981\u8CFC\u8AAD'
134+ needs_subscription = True
135+ oldest_article = 1
136+ max_articles_per_feed = 30
137+ language = 'ja'
138+ no_stylesheets = True
139+ cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
140+ masthead_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
141+
142+ remove_tags_before = {'class':"cmn-indent"}
143+ remove_tags = [
144+# {'class':"cmn-article_move"},
145+# {'class':"cmn-pr_list"},
146+# {'class':"cmnc-zoom"},
147+ {'class':"cmn-hide"},
148+ {'name':'form'},
149+ ]
150+ remove_tags_after = {'class':"cmn-indent"}
151+
152+ def get_browser(self):
153+ br = BasicNewsRecipe.get_browser()
154+
155+ #pp.pprint(self.parse_index())
156+ #exit(1)
157+
158+ #br.set_debug_http(True)
159+ #br.set_debug_redirects(True)
160+ #br.set_debug_responses(True)
161+
162+ if self.username is not None and self.password is not None:
163+ print "----------------------------open top page----------------------------------------"
164+ br.open('http://www.nikkei.com/')
165+ print "----------------------------open first login form--------------------------------"
166+ link = br.links(url_regex="www.nikkei.com/etc/accounts/login").next()
167+ br.follow_link(link)
168+ #response = br.response()
169+ #print response.get_data()
170+ print "----------------------------JS redirect(send autoPostForm)-----------------------"
171+ br.select_form(name='autoPostForm')
172+ br.submit()
173+ #response = br.response()
174+ print "----------------------------got login form---------------------------------------"
175+ br.select_form(name='LA0210Form01')
176+ br['LA0210Form01:LA0210Email'] = self.username
177+ br['LA0210Form01:LA0210Password'] = self.password
178+ br.submit()
179+ #response = br.response()
180+ print "----------------------------JS redirect------------------------------------------"
181+ br.select_form(nr=0)
182+ br.submit()
183+
184+ #br.set_debug_http(False)
185+ #br.set_debug_redirects(False)
186+ #br.set_debug_responses(False)
187+ return br
188+
189+ def cleanup(self):
190+ print "----------------------------logout-----------------------------------------------"
191+ self.browser.open('https://regist.nikkei.com/ds/etc/accounts/logout')
192+
193+ def parse_index(self):
194+ print "----------------------------get index of paper-----------------------------------"
195+ result = []
196+ soup = self.index_to_soup('http://www.nikkei.com/paper/')
197+ #soup = self.index_to_soup(self.test_data())
198+ for sect in soup.findAll('div', 'cmn-section kn-special JSID_baseSection'):
199+ sect_title = sect.find('h3', 'cmnc-title').string
200+ sect_result = []
201+ for elem in sect.findAll(attrs={'class':['cmn-article_title']}):
202+ url = 'http://www.nikkei.com' + elem.span.a['href']
203+ url = re.sub("/article/", "/print-article/", url) # print version.
204+ span = elem.span.a.span
205+ if ((span is not None) and (len(span.contents) > 1)):
206+ title = span.contents[1].string
207+ sect_result.append(dict(title=title, url=url, date='',
208+ description='', content=''))
209+ result.append([sect_title, sect_result])
210+ #pp.pprint(result)
211+
212
213=== added file 'recipes/techcrunch.recipe'
214--- recipes/techcrunch.recipe 1970-01-01 00:00:00 +0000
215+++ recipes/techcrunch.recipe 2011-07-19 06:18:03 +0000
216@@ -0,0 +1,63 @@
217+__license__ = 'GPL v3'
218+__copyright__ = '2011, Darko Miletic <darko.miletic at gmail.com>'
219+'''
220+techcrunch.com
221+'''
222+
223+from calibre.web.feeds.news import BasicNewsRecipe
224+
225+class TechCrunch(BasicNewsRecipe):
226+ title = 'TechCrunch'
227+ __author__ = 'Darko Miletic'
228+ description = 'IT News'
229+ publisher = 'AOL Inc.'
230+ category = 'news, IT'
231+ oldest_article = 2
232+ max_articles_per_feed = 200
233+ no_stylesheets = True
234+ encoding = 'utf8'
235+ use_embedded_content = False
236+ language = 'en'
237+ remove_empty_feeds = True
238+ publication_type = 'newsportal'
239+ masthead_url = 'http://s2.wp.com/wp-content/themes/vip/tctechcrunch2/images/site-logo.png'
240+ extra_css = """
241+ body{font-family: Helvetica,Arial,sans-serif }
242+ img{margin-bottom: 0.4em; display:block}
243+ """
244+
245+ conversion_options = {
246+ 'comment' : description
247+ , 'tags' : category
248+ , 'publisher' : publisher
249+ , 'language' : language
250+ }
251+
252+ remove_tags = [dict(name=['meta','link'])]
253+ remove_attributes=['lang']
254+ keep_only_tags=[
255+ dict(name='h1', attrs={'class':'headline'})
256+ ,dict(attrs={'class':['author','post-time','body-copy']})
257+ ]
258+
259+ feeds = [(u'News', u'http://feeds.feedburner.com/TechCrunch/')]
260+
261+ def preprocess_html(self, soup):
262+ for item in soup.findAll(style=True):
263+ del item['style']
264+ for item in soup.findAll('a'):
265+ limg = item.find('img')
266+ if item.string is not None:
267+ str = item.string
268+ item.replaceWith(str)
269+ else:
270+ if limg:
271+ item.name = 'div'
272+ item.attrs = []
273+ else:
274+ str = self.tag_to_string(item)
275+ item.replaceWith(str)
276+ for item in soup.findAll('img'):
277+ if not item.has_key('alt'):
278+ item['alt'] = 'image'
279+ return soup
280
281=== added file 'recipes/tijolaco.recipe'
282--- recipes/tijolaco.recipe 1970-01-01 00:00:00 +0000
283+++ recipes/tijolaco.recipe 2011-07-19 06:18:03 +0000
284@@ -0,0 +1,24 @@
285+from calibre.web.feeds.recipes import BasicNewsRecipe
286+
287+class Tijolaco(BasicNewsRecipe):
288+ title = u'Tijolaco.com'
289+ __author__ = u'Diniz Bortolotto'
290+ description = u'Posts do Blog Tijola\xe7o.com'
291+ oldest_article = 7
292+ max_articles_per_feed = 50
293+ encoding = 'utf8'
294+ publisher = u'Brizola Neto'
295+ category = 'politics, Brazil'
296+ language = 'pt_BR'
297+ publication_type = 'politics portal'
298+ use_embedded_content = False
299+ no_stylesheets = True
300+ remove_javascript = True
301+
302+ feeds = [(u'Blog Tijola\xe7o.com', u'http://feeds.feedburner.com/Tijolacoblog')]
303+
304+ reverse_article_order = True
305+
306+ keep_only_tags = [dict(name='div', attrs={'class':'post'})]
307+
308+ remove_tags = [dict(name='span', attrs={'class':'com'})]
309
310=== modified file 'recipes/time_magazine.recipe'
311--- recipes/time_magazine.recipe 2011-05-20 06:49:24 +0000
312+++ recipes/time_magazine.recipe 2011-07-19 06:18:03 +0000
313@@ -8,47 +8,33 @@
314
315 import re
316 from calibre.web.feeds.news import BasicNewsRecipe
317+from lxml import html
318
319 class Time(BasicNewsRecipe):
320 #recipe_disabled = ('This recipe has been disabled as TIME no longer'
321 # ' publish complete articles on the web.')
322 title = u'Time'
323- __author__ = 'Kovid Goyal and Sujata Raman'
324+ __author__ = 'Kovid Goyal'
325 description = 'Weekly magazine'
326 encoding = 'utf-8'
327 no_stylesheets = True
328 language = 'en'
329 remove_javascript = True
330
331- extra_css = ''' h1 {font-family:georgia,serif;color:#000000;}
332- .mainHd{font-family:georgia,serif;color:#000000;}
333- h2 {font-family:Arial,Sans-serif;}
334- .name{font-family:Arial,Sans-serif; font-size:x-small;font-weight:bold; }
335- .date{font-family:Arial,Sans-serif; font-size:x-small ;color:#999999;}
336- .byline{font-family:Arial,Sans-serif; font-size:x-small ;}
337- .photoBkt{ font-size:x-small ;}
338- .vertPhoto{font-size:x-small ;}
339- .credits{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
340- .credit{font-family:Arial,Sans-serif; font-size:x-small ;color:gray;}
341- .artTxt{font-family:georgia,serif;}
342- #content{font-family:georgia,serif;}
343- .caption{font-family:georgia,serif; font-size:x-small;color:#333333;}
344- .credit{font-family:georgia,serif; font-size:x-small;color:#999999;}
345- a:link{color:#CC0000;}
346- .breadcrumb{font-family:Arial,Sans-serif;font-size:x-small;}
347- '''
348-
349-
350- keep_only_tags = [ dict(name ="div",attrs = {"id" :["content"]}) ,
351- dict(name ="div",attrs = {"class" :["artHd","artTxt","photoBkt","vertPhoto","image","copy"]}) ,]
352- remove_tags = [ dict(name ="div",attrs = {'class':['articleFooterNav','listsByTopic','articleTools2','relatedContent','sideContent','topBannerWrap','articlePagination','nextUp',"rtCol","pagination","enlarge","contentTools2",]}),
353- dict(name ="span",attrs = {'class':['see']}),
354- dict(name ="div",attrs = {'id':['header','articleSideBar',"articleTools","articleFooter","cmBotLt","quigoPackage"]}),
355- dict(name ="a",attrs = {'class':['listLink']}),
356- dict(name ="ul",attrs = {'id':['shareSocial','tabs']}),
357- dict(name ="li",attrs = {'class':['back']}),
358- dict(name ="ul",attrs = {'class':['navCount']}),
359- ]
360+
361+ keep_only_tags = [
362+ {
363+ 'class':['artHd', 'articleContent',
364+ 'entry-title','entry-meta', 'entry-content', 'thumbnail']
365+ },
366+ ]
367+ remove_tags = [
368+ {'class':['content-tools', 'quigo', 'see',
369+ 'first-tier-social-tools', 'navigation', 'enlarge lightbox']},
370+ {'id':['share-tools']},
371+ {'rel':'lightbox'},
372+ ]
373+
374 recursions = 10
375 match_regexps = [r'/[0-9,]+-(2|3|4|5|6|7|8|9)(,\d+){0,1}.html',r'http://www.time.com/time/specials/packages/article/.*']
376
377@@ -56,10 +42,11 @@
378 r'<meta .+/>'), lambda m:'')]
379
380 def parse_index(self):
381- soup = self.index_to_soup('http://www.time.com/time/magazine')
382- img = soup.find('a', title="View Large Cover", href=True)
383- if img is not None:
384- cover_url = 'http://www.time.com'+img['href']
385+ raw = self.index_to_soup('http://www.time.com/time/magazine', raw=True)
386+ root = html.fromstring(raw)
387+ img = root.xpath('//a[.="View Large Cover" and @href]')
388+ if img:
389+ cover_url = 'http://www.time.com' + img[0].get('href')
390 try:
391 nsoup = self.index_to_soup(cover_url)
392 img = nsoup.find('img', src=re.compile('archive/covers'))
393@@ -70,46 +57,48 @@
394
395
396 feeds = []
397- parent = soup.find(id='tocGuts')
398- for seched in parent.findAll(attrs={'class':'toc_seched'}):
399- section = self.tag_to_string(seched).capitalize()
400- articles = list(self.find_articles(seched))
401- feeds.append((section, articles))
402+ parent = root.xpath('//div[@class="content-main-aside"]')[0]
403+ for sec in parent.xpath(
404+ 'descendant::section[contains(@class, "sec-mag-section")]'):
405+ h3 = sec.xpath('./h3')
406+ if h3:
407+ section = html.tostring(h3[0], encoding=unicode,
408+ method='text').strip().capitalize()
409+ self.log('Found section', section)
410+ articles = list(self.find_articles(sec))
411+ if articles:
412+ feeds.append((section, articles))
413
414 return feeds
415
416- def find_articles(self, seched):
417- for a in seched.findNextSiblings( attrs={'class':['toc_hed','rule2']}):
418- if a.name in "div":
419- break
420- else:
421- yield {
422- 'title' : self.tag_to_string(a),
423- 'url' : 'http://www.time.com'+a['href'],
424- 'date' : '',
425- 'description' : self.article_description(a)
426- }
427-
428-
429-
430- def article_description(self, a):
431- ans = []
432- while True:
433- t = a.nextSibling
434- if t is None:
435- break
436- a = t
437- if getattr(t, 'name', False):
438- if t.get('class', '') == 'toc_parens' or t.name == 'br':
439- continue
440- if t.name in ('div', 'a'):
441- break
442- ans.append(self.tag_to_string(t))
443- else:
444- ans.append(unicode(t))
445- return u' '.join(ans).replace(u'\xa0', u'').strip()
446+ def find_articles(self, sec):
447+
448+ for article in sec.xpath('./article'):
449+ h2 = article.xpath('./*[@class="entry-title"]')
450+ if not h2: continue
451+ a = h2[0].xpath('./a[@href]')
452+ if not a: continue
453+ title = html.tostring(a[0], encoding=unicode,
454+ method='text').strip()
455+ if not title: continue
456+ url = a[0].get('href')
457+ if url.startswith('/'):
458+ url = 'http://www.time.com'+url
459+ desc = ''
460+ p = article.xpath('./*[@class="entry-content"]')
461+ if p:
462+ desc = html.tostring(p[0], encoding=unicode,
463+ method='text')
464+ self.log('\t', title, ':\n\t\t', desc)
465+ yield {
466+ 'title' : title,
467+ 'url' : url,
468+ 'date' : '',
469+ 'description' : desc
470+ }
471
472 def postprocess_html(self,soup,first):
473 for tag in soup.findAll(attrs ={'class':['artPag','pagination']}):
474 tag.extract()
475 return soup
476+
477
478=== added file 'recipes/vio_mundo.recipe'
479--- recipes/vio_mundo.recipe 1970-01-01 00:00:00 +0000
480+++ recipes/vio_mundo.recipe 2011-07-19 06:18:03 +0000
481@@ -0,0 +1,30 @@
482+import re
483+from calibre.web.feeds.news import BasicNewsRecipe
484+
485+class VioMundo(BasicNewsRecipe):
486+ title = 'Blog VioMundo'
487+ __author__ = 'Diniz Bortolotto'
488+ description = 'Posts do Blog VioMundo'
489+ publisher = 'Luiz Carlos Azenha'
490+ oldest_article = 5
491+ max_articles_per_feed = 20
492+ category = 'news, politics, Brazil'
493+ language = 'pt_BR'
494+ publication_type = 'news and politics portal'
495+ use_embedded_content = False
496+ no_stylesheets = True
497+ remove_javascript = True
498+
499+ feeds = [(u'Blog VioMundo', u'http://www.viomundo.com.br/feed')]
500+
501+ reverse_article_order = True
502+
503+ def print_version(self, url):
504+ return url + '/print/'
505+
506+ remove_tags_after = dict(id='BlogContent')
507+
508+ preprocess_regexps = [
509+ (re.compile(r'\|\ <u>.*</p>'),
510+ lambda match: '</p>')
511+ ]
512
513=== modified file 'recipes/wired_uk.recipe'
514--- recipes/wired_uk.recipe 2010-02-17 17:47:04 +0000
515+++ recipes/wired_uk.recipe 2011-07-19 06:18:03 +0000
516@@ -1,28 +1,29 @@
517-
518 __license__ = 'GPL v3'
519-__copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
520+__copyright__ = '2011, Starson17 <Starson17 at gmail.com>'
521 '''
522 www.wired.co.uk
523 '''
524
525 from calibre import strftime
526 from calibre.web.feeds.news import BasicNewsRecipe
527+import re
528
529 class Wired_UK(BasicNewsRecipe):
530 title = 'Wired Magazine - UK edition'
531- __author__ = 'Darko Miletic'
532+ __author__ = 'Starson17'
533+ __version__ = 'v1.30'
534+ __date__ = '15 July 2011'
535 description = 'Gaming news'
536 publisher = 'Conde Nast Digital'
537 category = 'news, games, IT, gadgets'
538- oldest_article = 32
539+ oldest_article = 40
540 max_articles_per_feed = 100
541 no_stylesheets = True
542 encoding = 'utf-8'
543 use_embedded_content = False
544- masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
545+ #masthead_url = 'http://www.wired.co.uk/_/media/wired-logo_UK.gif'
546 language = 'en_GB'
547- extra_css = ' body{font-family: Palatino,"Palatino Linotype","Times New Roman",Times,serif} img{margin-bottom: 0.8em } .img-descr{font-family: Tahoma,Arial,Helvetica,sans-serif; font-size: 0.6875em; display: block} '
548- index = 'http://www.wired.co.uk/wired-magazine.aspx'
549+ index = 'http://www.wired.co.uk'
550
551 conversion_options = {
552 'comment' : description
553@@ -31,44 +32,118 @@
554 , 'language' : language
555 }
556
557- keep_only_tags = [dict(name='div', attrs={'class':'article-box'})]
558- remove_tags = [
559- dict(name=['object','embed','iframe','link'])
560- ,dict(attrs={'class':['opts','comment','stories']})
561- ]
562- remove_tags_after = dict(name='div',attrs={'class':'stories'})
563+ keep_only_tags = [dict(name='div', attrs={'class':['layoutColumn1']})]
564+ remove_tags = [dict(name='div',attrs={'class':['articleSidebar1','commentAddBox linkit','commentCountBox commentCountBoxBig']})]
565+ remove_tags_after = dict(name='div',attrs={'class':['mainCopy entry-content','mainCopy']})
566+ '''
567 remove_attributes = ['height','width']
568-
569-
570+ ,dict(name=['object','embed','iframe','link'])
571+ ,dict(attrs={'class':['opts','comment','stories']})
572+ ]
573+ '''
574 def parse_index(self):
575 totalfeeds = []
576 soup = self.index_to_soup(self.index)
577- maincontent = soup.find('div',attrs={'class':'main-content'})
578+ recentcontent = soup.find('ul',attrs={'class':'linkList3'})
579 mfeed = []
580- if maincontent:
581- st = maincontent.find(attrs={'class':'most-wired-box'})
582- if st:
583- for itt in st.findAll('a',href=True):
584- url = 'http://www.wired.co.uk' + itt['href']
585- title = self.tag_to_string(itt)
586- description = ''
587- date = strftime(self.timefmt)
588- mfeed.append({
589- 'title' :title
590- ,'date' :date
591- ,'url' :url
592- ,'description':description
593- })
594- totalfeeds.append(('Articles', mfeed))
595+ if recentcontent:
596+ for li in recentcontent.findAll('li'):
597+ a = li.h2.a
598+ url = self.index + a['href'] + '?page=all'
599+ title = self.tag_to_string(a)
600+ description = ''
601+ date = strftime(self.timefmt)
602+ mfeed.append({
603+ 'title' :title
604+ ,'date' :date
605+ ,'url' :url
606+ ,'description':description
607+ })
608+ totalfeeds.append(('Wired UK Magazine Latest News', mfeed))
609+ popmagcontent = soup.findAll('div',attrs={'class':'sidebarLinkList'})
610+ magcontent = popmagcontent[1]
611+ mfeed2 = []
612+ if magcontent:
613+ a = magcontent.h3.a
614+ if a:
615+ url = self.index + a['href'] + '?page=all'
616+ title = self.tag_to_string(a)
617+ description = ''
618+ date = strftime(self.timefmt)
619+ mfeed2.append({
620+ 'title' :title
621+ ,'date' :date
622+ ,'url' :url
623+ ,'description':description
624+ })
625+ for li in magcontent.findAll('li'):
626+ a = li.a
627+ url = self.index + a['href'] + '?page=all'
628+ title = self.tag_to_string(a)
629+ description = ''
630+ date = strftime(self.timefmt)
631+ mfeed2.append({
632+ 'title' :title
633+ ,'date' :date
634+ ,'url' :url
635+ ,'description':description
636+ })
637+ totalfeeds.append(('Wired UK Magazine Features', mfeed2))
638+
639+ magsoup = self.index_to_soup(self.index + '/magazine')
640+ startcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titleStart'}).parent
641+ mfeed3 = []
642+ if startcontent:
643+ for li in startcontent.findAll('li'):
644+ a = li.a
645+ url = self.index + a['href'] + '?page=all'
646+ title = self.tag_to_string(a)
647+ description = ''
648+ date = strftime(self.timefmt)
649+ mfeed3.append({
650+ 'title' :title
651+ ,'date' :date
652+ ,'url' :url
653+ ,'description':description
654+ })
655+ totalfeeds.append(('Wired UK Magazine More', mfeed3))
656+
657+ playcontent = magsoup.find('h3',attrs={'class':'magSubSectionTitle titlePlay'}).parent
658+ mfeed4 = []
659+ if playcontent:
660+ for li in playcontent.findAll('li'):
661+ a = li.a
662+ url = self.index + a['href'] + '?page=all'
663+ title = self.tag_to_string(a)
664+ description = ''
665+ date = strftime(self.timefmt)
666+ mfeed4.append({
667+ 'title' :title
668+ ,'date' :date
669+ ,'url' :url
670+ ,'description':description
671+ })
672+ totalfeeds.append(('Wired UK Magazine Play', mfeed4))
673 return totalfeeds
674
675 def get_cover_url(self):
676- cover_url = None
677- soup = self.index_to_soup(self.index)
678- cover_item = soup.find('span', attrs={'class':'cover'})
679+ cover_url = ''
680+ soup = self.index_to_soup(self.index + '/magazine/archive')
681+ cover_item = soup.find('div', attrs={'class':'image linkme'})
682 if cover_item:
683 cover_url = cover_item.img['src']
684 return cover_url
685
686- def print_version(self, url):
687- return url + '?page=all'
688+ def preprocess_html(self, soup):
689+ for tag in soup.findAll(name='p'):
690+ if tag.find(name='span', text=re.compile(r'This article was taken from.*', re.DOTALL|re.IGNORECASE)):
691+ tag.extract()
692+ return soup
693+
694+ extra_css = '''
695+ h1{font-family:Arial,Helvetica,sans-serif; font-weight:bold;font-size:large;}
696+ h2{font-family:Arial,Helvetica,sans-serif; font-weight:normal;font-size:small;}
697+ p{font-family:Arial,Helvetica,sans-serif;font-size:small;}
698+ body{font-family:Helvetica,Arial,sans-serif;font-size:small;}
699+ '''
700+
701
702=== modified file 'recipes/zeitde.recipe'
703--- recipes/zeitde.recipe 2010-12-14 16:32:16 +0000
704+++ recipes/zeitde.recipe 2011-07-19 06:18:03 +0000
705@@ -15,15 +15,16 @@
706 encoding = 'UTF-8'
707
708 __author__ = 'Martin Pitt, Sujata Raman, Ingo Paschke and Marc Toensing'
709+ no_stylesheets = True
710
711 max_articles_per_feed = 40
712
713 remove_tags = [
714- dict(name='iframe'),
715- dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
716- dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
717- dict(name='div', attrs={'id':["place_5","place_4","comments"]})
718- ]
719+ dict(name='iframe'),
720+ dict(name='div', attrs={'class':["response","pagination block","pagenav","inline link", "copyright"] }),
721+ dict(name='p', attrs={'class':["ressortbacklink", "copyright"] }),
722+ dict(name='div', attrs={'id':["place_5","place_4","comments"]})
723+ ]
724
725 keep_only_tags = [dict(id=['main'])]
726
727
728=== modified file 'recipes/zeitde_sub.recipe'
729--- recipes/zeitde_sub.recipe 2011-02-25 21:01:47 +0000
730+++ recipes/zeitde_sub.recipe 2011-07-19 06:18:03 +0000
731@@ -2,18 +2,21 @@
732 # -*- coding: utf-8 mode: python -*-
733
734 __license__ = 'GPL v3'
735-__copyright__ = '2010-2011, Steffen Siebert <calibre at steffensiebert.de>'
736+__copyright__ = '2010, Steffen Siebert <calibre at steffensiebert.de>'
737 __docformat__ = 'restructuredtext de'
738-__version__ = '1.2'
739+__version__ = '1.5'
740
741 """
742 Die Zeit EPUB
743 """
744
745-import os, urllib2, zipfile, re
746+import os, zipfile, re, cStringIO
747 from calibre.web.feeds.news import BasicNewsRecipe
748 from calibre.ptempfile import PersistentTemporaryFile
749 from calibre import walk
750+from urlparse import urlparse
751+from contextlib import closing
752+from calibre.utils.magick.draw import save_cover_data_to
753
754 class ZeitEPUBAbo(BasicNewsRecipe):
755
756@@ -22,49 +25,112 @@
757 language = 'de'
758 lang = 'de-DE'
759
760- __author__ = 'Steffen Siebert and Tobias Isenberg'
761+ __author__ = 'Steffen Siebert, revised by Tobias Isenberg (with some code by Kovid Goyal)'
762 needs_subscription = True
763
764 conversion_options = {
765 'no_default_epub_cover' : True,
766 # fixing the wrong left margin
767 'mobi_ignore_margins' : True,
768+ 'keep_ligatures' : True,
769 }
770
771 preprocess_regexps = [
772- # filtering for correct dashes
773- (re.compile(r' - '), lambda match: ' – '), # regular "Gedankenstrich"
774- (re.compile(r' -,'), lambda match: ' –,'), # "Gedankenstrich" before a comma
775- (re.compile(r'(?<=\d)-(?=\d)'), lambda match: '–'), # number-number
776+ # filtering for correct dashes ("Gedankenstrich" and "bis")
777+ (re.compile(u' (-|\u2212)(?=[ ,])'), lambda match: u' \u2013'),
778+ (re.compile(r'(?<=\d)-(?=\d)'), lambda match: u'\u2013'), # number-number
779+ (re.compile(u'(?<=\d,)-(?= ?\u20AC)'), lambda match: u'\u2013'), # ,- Euro
780+ # fix the number dash number dash for the title image that was broken by the previous line
781+ (re.compile(u'(?<=\d\d\d\d)\u2013(?=\d?\d\.png)'), lambda match: '-'),
782+ # filtering for certain dash cases
783+ (re.compile(r'Bild - Zeitung'), lambda match: 'Bild-Zeitung'), # the obvious
784+ (re.compile(r'EMail'), lambda match: 'E-Mail'), # the obvious
785+ (re.compile(r'SBahn'), lambda match: 'S-Bahn'), # the obvious
786+ (re.compile(r'UBoot'), lambda match: 'U-Boot'), # the obvious
787+ (re.compile(r'T Shirt'), lambda match: 'T-Shirt'), # the obvious
788+ (re.compile(r'TShirt'), lambda match: 'T-Shirt'), # the obvious
789+ # the next two lines not only fix errors but also create new ones. this is due to additional errors in
790+ # the typesetting such as missing commas or wrongly placed dashes. but more is fixed than broken.
791+ (re.compile(r'(?<!und|der|\w\w,) -(?=\w)'), lambda match: '-'), # space too much before a connecting dash
792+ (re.compile(r'(?<=\w)- (?!und\b|oder\b|wie\b|aber\b|auch\b|sondern\b|bis\b|&amp;|&\s|bzw\.|auf\b|eher\b)'), lambda match: '-'), # space too much after a connecting dash
793+ # filtering for missing spaces before the month in long dates
794+ (re.compile(u'(?<=\d)\.(?=(Januar|Februar|M\u00E4rz|April|Mai|Juni|Juli|August|September|Oktober|November|Dezember))'), lambda match: '. '),
795+ # filtering for other missing spaces
796+ (re.compile(r'Stuttgart21'), lambda match: 'Stuttgart 21'), # the obvious
797+ (re.compile(u'(?<=\d)(?=\u20AC)'), lambda match: u'\u2013'), # Zahl[no space]Euro
798+ (re.compile(r':(?=[^\d\s</])'), lambda match: ': '), # missing space after colon
799+ (re.compile(u'\u00AB(?=[^\-\.:;,\?!<\)\s])'), lambda match: u'\u00AB '), # missing space after closing quotation
800+ (re.compile(u'(?<=[^\s\(>])\u00BB'), lambda match: u' \u00BB'), # missing space before opening quotation
801+ (re.compile(r'(?<=[a-z])(?=(I|II|III|IV|V|VI|VII|VIII|IX|X|XI|XII|XIII|XIV|XV|XVI|XVII|XVIII|XIX|XX)\.)'), lambda match: ' '), # missing space before Roman numeral
802+ (re.compile(r'(?<=(I|V|X)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
803+ (re.compile(r'(?<=(II|IV|VI|IX|XI|XV|XX)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
804+ (re.compile(r'(?<=(III|VII|XII|XIV|XVI|XIX)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
805+ (re.compile(r'(?<=(VIII|XIII|XVII)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
806+ (re.compile(r'(?<=(XVIII)\.)(?=[\w])'), lambda match: ' '), # missing space after Roman numeral
807+ (re.compile(r'(?<=[A-Za-zÄÖÜäöü]),(?=[A-Za-zÄÖÜäöü])'), lambda match: ', '), # missing space after comma
808+ (re.compile(r'(?<=[a-zäöü])\.(?=[A-ZÄÖÜ][A-Za-zÄÖÜäöü])'), lambda match: '. '), # missing space after full-stop
809+ (re.compile(r'(?<=[uU]\.) (?=a\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously
810+ (re.compile(r'(?<=[iI]\.) (?=A\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously
811+ (re.compile(r'(?<=[zZ]\.) (?=B\.)'), lambda match: u'\u2008'), # fix abbreviation that was potentially broken previously
812+ (re.compile(r'(?<=\w\.) (?=[A-Z][a-z]*@)'), lambda match: ''), # fix e-mail address that was potentially broken previously
813+ (re.compile(r'(?<=\d)[Pp]rozent'), lambda match: ' Prozent'),
814+ (re.compile(r'\.\.\.\.+'), lambda match: '...'), # too many dots (....)
815+ (re.compile(r'(?<=[^\s])\.\.\.'), lambda match: ' ...'), # spaces before ...
816+ (re.compile(r'\.\.\.(?=[^\s])'), lambda match: '... '), # spaces after ...
817+ (re.compile(r'(?<=[\[\(]) \.\.\. (?=[\]\)])'), lambda match: '...'), # fix special cases of ... in brackets
818+ (re.compile(u'(?<=[\u00BB\u203A]) \.\.\.'), lambda match: '...'), # fix special cases of ... after a quotation mark
819+ (re.compile(u'\.\.\. (?=[\u00AB\u2039,])'), lambda match: '...'), # fix special cases of ... before a quotation mark or comma
820+ # fix missing spaces between numbers and any sort of units, possibly with dot
821+ (re.compile(r'(?<=\d)(?=(Femto|Piko|Nano|Mikro|Milli|Zenti|Dezi|Hekto|Kilo|Mega|Giga|Tera|Peta|Tausend|Trilli|Kubik|Quadrat|Meter|Uhr|Jahr|Schuljahr|Seite))'), lambda match: ' '),
822+ (re.compile(r'(?<=\d\.)(?=(Femto|Piko|Nano|Mikro|Milli|Zenti|Dezi|Hekto|Kilo|Mega|Giga|Tera|Peta|Tausend|Trilli|Kubik|Quadrat|Meter|Uhr|Jahr|Schuljahr|Seite))'), lambda match: ' '),
823+ # fix wrong spaces
824+ (re.compile(r'(?<=<p class="absatz">[A-ZÄÖÜ]) (?=[a-zäöü\-])'), lambda match: ''), # at beginning of paragraphs
825+ (re.compile(u' \u00AB'), lambda match: u'\u00AB '), # before closing quotation
826+ (re.compile(u'\u00BB '), lambda match: u' \u00BB'), # after opening quotation
827+ # filtering for spaces in large numbers for better readability
828+ (re.compile(r'(?<=\d\d)(?=\d\d\d[ ,\.;\)<\?!-])'), lambda match: u'\u2008'), # end of the number with some character following
829+ (re.compile(r'(?<=\d\d)(?=\d\d\d. )'), lambda match: u'\u2008'), # end of the number with full-stop following, then space is necessary (avoid file names)
830+ (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
831+ (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
832+ (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
833+ (re.compile(u'(?<=\d)(?=\d\d\d\u2008)'), lambda match: u'\u2008'), # next level
834 # filtering for unicode characters that are missing on the Kindle,
835 # try to replace them with meaningful work-arounds
836- (re.compile(u'\u2080'), lambda match: '<span style="font-size: 50%;">0</span>'), # subscript-0
837- (re.compile(u'\u2081'), lambda match: '<span style="font-size: 50%;">1</span>'), # subscript-1
838- (re.compile(u'\u2082'), lambda match: '<span style="font-size: 50%;">2</span>'), # subscript-2
839- (re.compile(u'\u2083'), lambda match: '<span style="font-size: 50%;">3</span>'), # subscript-3
840- (re.compile(u'\u2084'), lambda match: '<span style="font-size: 50%;">4</span>'), # subscript-4
841- (re.compile(u'\u2085'), lambda match: '<span style="font-size: 50%;">5</span>'), # subscript-5
842- (re.compile(u'\u2086'), lambda match: '<span style="font-size: 50%;">6</span>'), # subscript-6
843- (re.compile(u'\u2087'), lambda match: '<span style="font-size: 50%;">7</span>'), # subscript-7
844- (re.compile(u'\u2088'), lambda match: '<span style="font-size: 50%;">8</span>'), # subscript-8
845- (re.compile(u'\u2089'), lambda match: '<span style="font-size: 50%;">9</span>'), # subscript-9
846+ (re.compile(u'\u2080'), lambda match: '<span style="font-size: 40%;">0</span>'), # subscript-0
847+ (re.compile(u'\u2081'), lambda match: '<span style="font-size: 40%;">1</span>'), # subscript-1
848+ (re.compile(u'\u2082'), lambda match: '<span style="font-size: 40%;">2</span>'), # subscript-2
849+ (re.compile(u'\u2083'), lambda match: '<span style="font-size: 40%;">3</span>'), # subscript-3
850+ (re.compile(u'\u2084'), lambda match: '<span style="font-size: 40%;">4</span>'), # subscript-4
851+ (re.compile(u'\u2085'), lambda match: '<span style="font-size: 40%;">5</span>'), # subscript-5
852+ (re.compile(u'\u2086'), lambda match: '<span style="font-size: 40%;">6</span>'), # subscript-6
853+ (re.compile(u'\u2087'), lambda match: '<span style="font-size: 40%;">7</span>'), # subscript-7
854+ (re.compile(u'\u2088'), lambda match: '<span style="font-size: 40%;">8</span>'), # subscript-8
855+ (re.compile(u'\u2089'), lambda match: '<span style="font-size: 40%;">9</span>'), # subscript-9
856+ # always chance CO2
857+ (re.compile(r'CO2'), lambda match: 'CO<span style="font-size: 40%;">2</span>'), # CO2
858+ # remove *** paragraphs
859+ (re.compile(r'<p class="absatz">\*\*\*</p>'), lambda match: ''),
860+ # better layout for the top line of each article
861+ (re.compile(u'(?<=DIE ZEIT N\u00B0 \d /) (?=\d\d)'), lambda match: ' 20'), # proper year in edition number
862+ (re.compile(u'(?<=DIE ZEIT N\u00B0 \d\d /) (?=\d\d)'), lambda match: ' 20'), # proper year in edition number
863+ (re.compile(u'(?<=>)(?=DIE ZEIT N\u00B0 \d\d / 20\d\d)'), lambda match: u' \u2014 '), # m-dash between category and DIE ZEIT
864 ]
865
866 def build_index(self):
867- domain = "http://premium.zeit.de"
868- url = domain + "/abovorteile/cgi-bin/_er_member/p4z.fpl?ER_Do=getUserData&ER_NextTemplate=login_ok"
869-
870+ domain = "https://premium.zeit.de"
871+ url = domain + "/abo/zeit_digital"
872 browser = self.get_browser()
873- browser.add_password("http://premium.zeit.de", self.username, self.password)
874-
875- try:
876- browser.open(url)
877- except urllib2.HTTPError:
878- self.report_progress(0,_("Can't login to download issue"))
879- raise ValueError('Failed to login, check your username and password')
880-
881- response = browser.follow_link(text="DIE ZEIT als E-Paper")
882- response = browser.follow_link(url_regex=re.compile('^http://contentserver.hgv-online.de/nodrm/fulfillment\\?distributor=zeit-online&orderid=zeit_online.*'))
883+
884+ # new login process
885+ response = browser.open(url)
886+ browser.select_form(nr=2)
887+ browser.form['name']=self.username
888+ browser.form['pass']=self.password
889+ browser.submit()
890+ # now find the correct file, we will still use the ePub file
891+ epublink = browser.find_link(text_regex=re.compile('.*Ausgabe als Datei im ePub-Format.*'))
892+ response = browser.follow_link(epublink)
893+ self.report_progress(1,_('next step'))
894
895 tmp = PersistentTemporaryFile(suffix='.epub')
896 self.report_progress(0,_('downloading epub'))
897@@ -104,9 +170,45 @@
898
899 # getting url of the cover
900 def get_cover_url(self):
901+ self.log.warning('Downloading cover')
902 try:
903- inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
904- cover_url = inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
905+ self.log.warning('Trying PDF-based cover')
906+ domain = "https://premium.zeit.de"
907+ url = domain + "/abo/zeit_digital"
908+ browser = self.get_browser()
909+
910+ # new login process
911+ browser.open(url)
912+ browser.select_form(nr=2)
913+ browser.form['name']=self.username
914+ browser.form['pass']=self.password
915+ browser.submit()
916+ # actual cover search
917+ pdflink = browser.find_link(url_regex=re.compile('system/files/epaper/DZ/pdf/DZ_ePaper*'))
918+ cover_url = urlparse(pdflink.base_url)[0]+'://'+urlparse(pdflink.base_url)[1]+''+(urlparse(pdflink.url)[2]).replace('ePaper_','').replace('.pdf','_001.pdf')
919+ self.log.warning('PDF link found:')
920+ self.log.warning(cover_url)
921+ # download the cover (has to be here due to new login process)
922+ with closing(browser.open(cover_url)) as r:
923+ cdata = r.read()
924+ from calibre.ebooks.metadata.pdf import get_metadata
925+ stream = cStringIO.StringIO(cdata)
926+ cdata = None
927+ mi = get_metadata(stream)
928+ if mi.cover_data and mi.cover_data[1]:
929+ cdata = mi.cover_data[1]
930+
931+ cpath = os.path.join(self.output_dir, 'cover.jpg')
932+ save_cover_data_to(cdata, cpath)
933+ cover_url = cpath
934+
935 except:
936- cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
937+ self.log.warning('Trying low-res cover')
938+ try:
939+ inhalt = self.index_to_soup('http://www.zeit.de/inhalt')
940+ cover_url = inhalt.find('div', attrs={'class':'singlearchive clearfix'}).img['src'].replace('icon_','')
941+ except:
942+ self.log.warning('Using static old low-res cover')
943+ cover_url = 'http://images.zeit.de/bilder/titelseiten_zeit/1946/001_001.jpg'
944 return cover_url
945+
946
947=== modified file 'resources/default_tweaks.py'
948--- resources/default_tweaks.py 2011-06-30 16:38:28 +0000
949+++ resources/default_tweaks.py 2011-07-19 06:18:03 +0000
950@@ -366,3 +366,10 @@
951 # on at your own risk!
952 unified_title_toolbar_on_osx = False
953
954+#: Save original file when converting from same format to same format
955+# When calibre does a conversion from the same format to the same format, for
956+# example, from EPUB to EPUB, the original file is saved, so that in case the
957+# conversion is poor, you can tweak the settings and run it again. By setting
958+# this to False you can prevent calibre from saving the original file.
959+save_original_format = True
960+
961
962=== modified file 'resources/images/devices/kindle.jpg'
963Binary files resources/images/devices/kindle.jpg 2009-12-20 01:18:13 +0000 and resources/images/devices/kindle.jpg 2011-07-19 06:18:03 +0000 differ
964=== modified file 'resources/templates/fb2.xsl'
965--- resources/templates/fb2.xsl 2011-06-11 15:27:21 +0000
966+++ resources/templates/fb2.xsl 2011-07-19 06:18:03 +0000
967@@ -379,7 +379,8 @@
968 <!-- image -->
969 <xsl:template match="fb:image">
970 <div align="center">
971- <img border="1">
972+ <xsl:element name="img">
973+ <xsl:attribute name="border">1</xsl:attribute>
974 <xsl:choose>
975 <xsl:when test="starts-with(@xlink:href,'#')">
976 <xsl:attribute name="src"><xsl:value-of select="substring-after(@xlink:href,'#')"/></xsl:attribute>
977@@ -388,7 +389,10 @@
978 <xsl:attribute name="src"><xsl:value-of select="@xlink:href"/></xsl:attribute>
979 </xsl:otherwise>
980 </xsl:choose>
981- </img>
982+ <xsl:if test="@title">
983+ <xsl:attribute name="title"><xsl:value-of select="@title"/></xsl:attribute>
984+ </xsl:if>
985+ </xsl:element>
986 </div>
987 </xsl:template>
988 </xsl:stylesheet>
989
990=== modified file 'session.vim'
991--- session.vim 2011-07-10 19:29:15 +0000
992+++ session.vim 2011-07-19 06:18:03 +0000
993@@ -1,5 +1,5 @@
994 " Project wide builtins
995-let g:pyflakes_builtins += ["dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
996+let g:pyflakes_builtins = ["_", "dynamic_property", "__", "P", "I", "lopen", "icu_lower", "icu_upper", "icu_title", "ngettext"]
997
998 python << EOFPY
999 import os
1000@@ -15,7 +15,7 @@
1001 project_dir=project_dir, base_dir=base_dir)
1002
1003 def recipe_title_callback(raw):
1004- return eval(raw.decode('utf-8'))
1005+ return eval(raw.decode('utf-8')).replace(' ', '_')
1006
1007 vipy.session.add_content_browser('.r', ',r', 'Recipe',
1008 vipy.session.glob_based_iterator(os.path.join(project_dir, 'recipes', '*.recipe')),
1009
1010=== modified file 'setup/check.py'
1011--- setup/check.py 2011-07-10 19:29:15 +0000
1012+++ setup/check.py 2011-07-19 06:18:03 +0000
1013@@ -25,18 +25,11 @@
1014 return '%s:%s: %s'%(self.filename, self.lineno, self.msg)
1015
1016 def check_for_python_errors(code_string, filename):
1017- # Since compiler.parse does not reliably report syntax errors, use the
1018- # built in compiler first to detect those.
1019+ import _ast
1020+ # First, compile into an AST and handle syntax errors.
1021 try:
1022- try:
1023- compile(code_string, filename, "exec")
1024- except MemoryError:
1025- # Python 2.4 will raise MemoryError if the source can't be
1026- # decoded.
1027- if sys.version_info[:2] == (2, 4):
1028- raise SyntaxError(None)
1029- raise
1030- except (SyntaxError, IndentationError), value:
1031+ tree = compile(code_string, filename, "exec", _ast.PyCF_ONLY_AST)
1032+ except (SyntaxError, IndentationError) as value:
1033 msg = value.args[0]
1034
1035 (lineno, offset, text) = value.lineno, value.offset, value.text
1036@@ -47,13 +40,11 @@
1037 # bogus message that claims the encoding the file declared was
1038 # unknown.
1039 msg = "%s: problem decoding source" % filename
1040+
1041 return [Message(filename, lineno, msg)]
1042 else:
1043- # Okay, it's syntactically valid. Now parse it into an ast and check
1044- # it.
1045- import compiler
1046 checker = __import__('pyflakes.checker').checker
1047- tree = compiler.parse(code_string)
1048+ # Okay, it's syntactically valid. Now check it.
1049 w = checker.Checker(tree, filename)
1050 w.messages.sort(lambda a, b: cmp(a.lineno, b.lineno))
1051 return [Message(x.filename, x.lineno, x.message%x.message_args) for x in
1052
1053=== modified file 'setup/translations.py'
1054--- setup/translations.py 2011-07-10 19:29:15 +0000
1055+++ setup/translations.py 2011-07-19 06:18:03 +0000
1056@@ -8,11 +8,18 @@
1057
1058 import os, tempfile, shutil, subprocess, glob, re, time, textwrap
1059 from distutils import sysconfig
1060+from functools import partial
1061
1062 from setup import Command, __appname__, __version__
1063-from setup.build_environment import pyqt
1064-
1065-class POT(Command):
1066+
1067+def qt_sources():
1068+ qtdir = glob.glob('/usr/src/qt-*')[-1]
1069+ j = partial(os.path.join, qtdir)
1070+ return list(map(j, [
1071+ 'src/gui/widgets/qdialogbuttonbox.cpp',
1072+ ]))
1073+
1074+class POT(Command): # {{{
1075
1076 description = 'Update the .pot translation template'
1077 PATH = os.path.join(Command.SRC, __appname__, 'translations')
1078@@ -82,6 +89,8 @@
1079 time=time.strftime('%Y-%m-%d %H:%M+%Z'))
1080
1081 files = self.source_files()
1082+ qt_inputs = qt_sources()
1083+
1084 with tempfile.NamedTemporaryFile() as fl:
1085 fl.write('\n'.join(files))
1086 fl.flush()
1087@@ -91,8 +100,14 @@
1088 subprocess.check_call(['xgettext', '-f', fl.name,
1089 '--default-domain=calibre', '-o', out.name, '-L', 'Python',
1090 '--from-code=UTF-8', '--sort-by-file', '--omit-header',
1091- '--no-wrap', '-k__',
1092+ '--no-wrap', '-k__', '--add-comments=NOTE:',
1093 ])
1094+ subprocess.check_call(['xgettext', '-j',
1095+ '--default-domain=calibre', '-o', out.name,
1096+ '--from-code=UTF-8', '--sort-by-file', '--omit-header',
1097+ '--no-wrap', '-kQT_TRANSLATE_NOOP:2',
1098+ ] + qt_inputs)
1099+
1100 with open(out.name, 'rb') as f:
1101 src = f.read()
1102 os.remove(out.name)
1103@@ -102,10 +117,12 @@
1104 with open(pot, 'wb') as f:
1105 f.write(src)
1106 self.info('Translations template:', os.path.abspath(pot))
1107- return pot
1108-
1109-
1110-class Translations(POT):
1111+
1112+
1113+ return pot
1114+# }}}
1115+
1116+class Translations(POT): # {{{
1117 description='''Compile the translations'''
1118 DEST = os.path.join(os.path.dirname(POT.SRC), 'resources', 'localization',
1119 'locales')
1120@@ -117,7 +134,6 @@
1121 locale = os.path.splitext(os.path.basename(po_file))[0]
1122 return locale, os.path.join(self.DEST, locale, 'messages.mo')
1123
1124-
1125 def run(self, opts):
1126 for f in self.po_files():
1127 locale, dest = self.mo_file(f)
1128@@ -126,7 +142,7 @@
1129 os.makedirs(base)
1130 self.info('\tCompiling translations for', locale)
1131 subprocess.check_call(['msgfmt', '-o', dest, f])
1132- if locale in ('en_GB', 'nds', 'te', 'yi'):
1133+ if locale in ('en_GB', 'en_CA', 'en_AU', 'si', 'ur', 'sc', 'ltg', 'nds', 'te', 'yi'):
1134 continue
1135 pycountry = self.j(sysconfig.get_python_lib(), 'pycountry',
1136 'locales', locale, 'LC_MESSAGES')
1137@@ -140,17 +156,6 @@
1138 self.warn('No ISO 639 translations for locale:', locale,
1139 '\nDo you have pycountry installed?')
1140
1141- base = os.path.join(pyqt.qt_data_dir, 'translations')
1142- qt_translations = glob.glob(os.path.join(base, 'qt_*.qm'))
1143- if not qt_translations:
1144- raise Exception('Could not find qt translations')
1145- for f in qt_translations:
1146- locale = self.s(self.b(f))[0][3:]
1147- dest = self.j(self.DEST, locale, 'LC_MESSAGES', 'qt.qm')
1148- if self.e(self.d(dest)) and self.newer(dest, f):
1149- self.info('\tCopying Qt translation for locale:', locale)
1150- shutil.copy2(f, dest)
1151-
1152 self.write_stats()
1153 self.freeze_locales()
1154
1155@@ -201,7 +206,7 @@
1156 for x in (i, j, d):
1157 if os.path.exists(x):
1158 os.remove(x)
1159-
1160+# }}}
1161
1162 class GetTranslations(Translations):
1163
1164
1165=== modified file 'src/calibre/__init__.py'
1166--- src/calibre/__init__.py 2011-06-27 21:34:52 +0000
1167+++ src/calibre/__init__.py 2011-07-19 06:18:03 +0000
1168@@ -341,7 +341,7 @@
1169 def browser(honor_time=True, max_time=2, mobile_browser=False, user_agent=None):
1170 '''
1171 Create a mechanize browser for web scraping. The browser handles cookies,
1172- refresh requests and ignores robots.txt. Also uses proxy if avaialable.
1173+ refresh requests and ignores robots.txt. Also uses proxy if available.
1174
1175 :param honor_time: If True honors pause time in refresh requests
1176 :param max_time: Maximum time in seconds to wait during a refresh request
1177@@ -474,7 +474,7 @@
1178 def my_unichr(num):
1179 try:
1180 return unichr(num)
1181- except ValueError:
1182+ except (ValueError, OverflowError):
1183 return u'?'
1184
1185 def entity_to_unicode(match, exceptions=[], encoding='cp1252',
1186
1187=== modified file 'src/calibre/constants.py'
1188--- src/calibre/constants.py 2011-07-08 17:01:37 +0000
1189+++ src/calibre/constants.py 2011-07-19 06:18:03 +0000
1190@@ -4,7 +4,7 @@
1191 __copyright__ = '2008, Kovid Goyal kovid@kovidgoyal.net'
1192 __docformat__ = 'restructuredtext en'
1193 __appname__ = u'calibre'
1194-numeric_version = (0, 8, 9)
1195+numeric_version = (0, 8, 10)
1196 __version__ = u'.'.join(map(unicode, numeric_version))
1197 __author__ = u"Kovid Goyal <kovid@kovidgoyal.net>"
1198
1199
1200=== modified file 'src/calibre/customize/builtins.py'
1201--- src/calibre/customize/builtins.py 2011-07-11 11:31:21 +0000
1202+++ src/calibre/customize/builtins.py 2011-07-19 06:18:03 +0000
1203@@ -1181,6 +1181,16 @@
1204 headquarters = 'US'
1205 formats = ['EPUB', 'MOBI', 'PDF']
1206
1207+class StoreChitankaStore(StoreBase):
1208+ name = u'Моята библиотека'
1209+ author = 'Alex Stanev'
1210+ description = u'Независим сайт за DRM свободна литература на български език'
1211+ actual_plugin = 'calibre.gui2.store.stores.chitanka_plugin:ChitankaStore'
1212+
1213+ drm_free_only = True
1214+ headquarters = 'BG'
1215+ formats = ['FB2', 'EPUB', 'TXT', 'SFB']
1216+
1217 class StoreDieselEbooksStore(StoreBase):
1218 name = 'Diesel eBooks'
1219 description = u'Instant access to over 2.4 million titles from hundreds of publishers including Harlequin, HarperCollins, John Wiley & Sons, McGraw-Hill, Simon & Schuster and Random House.'
1220@@ -1455,6 +1465,7 @@
1221 StoreBNStore,
1222 StoreBeamEBooksDEStore,
1223 StoreBeWriteStore,
1224+ StoreChitankaStore,
1225 StoreDieselEbooksStore,
1226 StoreEbookNLStore,
1227 StoreEbookscomStore,
1228
1229=== modified file 'src/calibre/db/backend.py'
1230--- src/calibre/db/backend.py 2011-07-04 01:03:52 +0000
1231+++ src/calibre/db/backend.py 2011-07-19 06:18:03 +0000
1232@@ -8,7 +8,7 @@
1233 __docformat__ = 'restructuredtext en'
1234
1235 # Imports {{{
1236-import os, shutil, uuid, json
1237+import os, shutil, uuid, json, glob, time, tempfile
1238 from functools import partial
1239
1240 import apsw
1241@@ -25,7 +25,7 @@
1242 from calibre.utils.date import utcfromtimestamp, parse_date
1243 from calibre.utils.filenames import is_case_sensitive
1244 from calibre.db.tables import (OneToOneTable, ManyToOneTable, ManyToManyTable,
1245- SizeTable, FormatsTable, AuthorsTable, IdentifiersTable)
1246+ SizeTable, FormatsTable, AuthorsTable, IdentifiersTable, CompositeTable)
1247 # }}}
1248
1249 '''
1250@@ -37,6 +37,8 @@
1251
1252 '''
1253
1254+SPOOL_SIZE = 30*1024*1024
1255+
1256 class DynamicFilter(object): # {{{
1257
1258 'No longer used, present for legacy compatibility'
1259@@ -478,7 +480,6 @@
1260 remove.append(data)
1261 continue
1262
1263- self.custom_column_label_map[data['label']] = data['num']
1264 self.custom_column_num_map[data['num']] = \
1265 self.custom_column_label_map[data['label']] = data
1266
1267@@ -613,10 +614,31 @@
1268
1269 tables['size'] = SizeTable('size', self.field_metadata['size'].copy())
1270
1271- for label, data in self.custom_column_label_map.iteritems():
1272- label = '#' + label
1273+ self.FIELD_MAP = {'id':0, 'title':1, 'authors':2, 'timestamp':3,
1274+ 'size':4, 'rating':5, 'tags':6, 'comments':7, 'series':8,
1275+ 'publisher':9, 'series_index':10, 'sort':11, 'author_sort':12,
1276+ 'formats':13, 'path':14, 'pubdate':15, 'uuid':16, 'cover':17,
1277+ 'au_map':18, 'last_modified':19, 'identifiers':20}
1278+
1279+ for k,v in self.FIELD_MAP.iteritems():
1280+ self.field_metadata.set_field_record_index(k, v, prefer_custom=False)
1281+
1282+ base = max(self.FIELD_MAP.itervalues())
1283+
1284+ for label_, data in self.custom_column_label_map.iteritems():
1285+ label = self.field_metadata.custom_field_prefix + label_
1286 metadata = self.field_metadata[label].copy()
1287 link_table = self.custom_table_names(data['num'])[1]
1288+ self.FIELD_MAP[data['num']] = base = base+1
1289+ self.field_metadata.set_field_record_index(label_, base,
1290+ prefer_custom=True)
1291+ if data['datatype'] == 'series':
1292+ # account for the series index column. Field_metadata knows that
1293+ # the series index is one larger than the series. If you change
1294+ # it here, be sure to change it there as well.
1295+ self.FIELD_MAP[str(data['num'])+'_index'] = base = base+1
1296+ self.field_metadata.set_field_record_index(label_+'_index', base,
1297+ prefer_custom=True)
1298
1299 if data['normalized']:
1300 if metadata['is_multiple']:
1301@@ -633,7 +655,16 @@
1302 metadata['table'] = link_table
1303 tables[label] = OneToOneTable(label, metadata)
1304 else:
1305- tables[label] = OneToOneTable(label, metadata)
1306+ if data['datatype'] == 'composite':
1307+ tables[label] = CompositeTable(label, metadata)
1308+ else:
1309+ tables[label] = OneToOneTable(label, metadata)
1310+
1311+ self.FIELD_MAP['ondevice'] = base = base+1
1312+ self.field_metadata.set_field_record_index('ondevice', base, prefer_custom=False)
1313+ self.FIELD_MAP['marked'] = base = base+1
1314+ self.field_metadata.set_field_record_index('marked', base, prefer_custom=False)
1315+
1316 # }}}
1317
1318 @property
1319@@ -732,5 +763,57 @@
1320 pprint.pprint(table.metadata)
1321 raise
1322
1323+ def format_abspath(self, book_id, fmt, fname, path):
1324+ path = os.path.join(self.library_path, path)
1325+ fmt = ('.' + fmt.lower()) if fmt else ''
1326+ fmt_path = os.path.join(path, fname+fmt)
1327+ if os.path.exists(fmt_path):
1328+ return fmt_path
1329+ try:
1330+ candidates = glob.glob(os.path.join(path, '*'+fmt))
1331+ except: # If path contains strange characters this throws an exc
1332+ candidates = []
1333+ if fmt and candidates and os.path.exists(candidates[0]):
1334+ shutil.copyfile(candidates[0], fmt_path)
1335+ return fmt_path
1336+
1337+ def format_metadata(self, book_id, fmt, fname, path):
1338+ path = self.format_abspath(book_id, fmt, fname, path)
1339+ ans = {}
1340+ if path is not None:
1341+ stat = os.stat(path)
1342+ ans['size'] = stat.st_size
1343+ ans['mtime'] = utcfromtimestamp(stat.st_mtime)
1344+ return ans
1345+
1346+ def cover(self, path, as_file=False, as_image=False,
1347+ as_path=False):
1348+ path = os.path.join(self.library_path, path, 'cover.jpg')
1349+ ret = None
1350+ if os.access(path, os.R_OK):
1351+ try:
1352+ f = lopen(path, 'rb')
1353+ except (IOError, OSError):
1354+ time.sleep(0.2)
1355+ f = lopen(path, 'rb')
1356+ with f:
1357+ if as_path:
1358+ pt = PersistentTemporaryFile('_dbcover.jpg')
1359+ with pt:
1360+ shutil.copyfileobj(f, pt)
1361+ return pt.name
1362+ if as_file:
1363+ ret = tempfile.SpooledTemporaryFile(SPOOL_SIZE)
1364+ shutil.copyfileobj(f, ret)
1365+ ret.seek(0)
1366+ else:
1367+ ret = f.read()
1368+ if as_image:
1369+ from PyQt4.Qt import QImage
1370+ i = QImage()
1371+ i.loadFromData(ret)
1372+ ret = i
1373+ return ret
1374+
1375 # }}}
1376
1377
1378=== modified file 'src/calibre/db/cache.py'
1379--- src/calibre/db/cache.py 2011-07-05 04:59:54 +0000
1380+++ src/calibre/db/cache.py 2011-07-19 06:18:03 +0000
1381@@ -7,5 +7,380 @@
1382 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
1383 __docformat__ = 'restructuredtext en'
1384
1385-
1386-
1387+import os
1388+from collections import defaultdict
1389+from functools import wraps, partial
1390+
1391+from calibre.db.locking import create_locks, RecordLock
1392+from calibre.db.fields import create_field
1393+from calibre.ebooks.book.base import Metadata
1394+from calibre.utils.date import now
1395+
1396+def api(f):
1397+ f.is_cache_api = True
1398+ return f
1399+
1400+def read_api(f):
1401+ f = api(f)
1402+ f.is_read_api = True
1403+ return f
1404+
1405+def write_api(f):
1406+ f = api(f)
1407+ f.is_read_api = False
1408+ return f
1409+
1410+def wrap_simple(lock, func):
1411+ @wraps(func)
1412+ def ans(*args, **kwargs):
1413+ with lock:
1414+ return func(*args, **kwargs)
1415+ return ans
1416+
1417+
1418+class Cache(object):
1419+
1420+ def __init__(self, backend):
1421+ self.backend = backend
1422+ self.fields = {}
1423+ self.composites = set()
1424+ self.read_lock, self.write_lock = create_locks()
1425+ self.record_lock = RecordLock(self.read_lock)
1426+ self.format_metadata_cache = defaultdict(dict)
1427+
1428+ # Implement locking for all simple read/write API methods
1429+ # An unlocked version of the method is stored with the name starting
1430+ # with a leading underscore. Use the unlocked versions when the lock
1431+ # has already been acquired.
1432+ for name in dir(self):
1433+ func = getattr(self, name)
1434+ ira = getattr(func, 'is_read_api', None)
1435+ if ira is not None:
1436+ # Save original function
1437+ setattr(self, '_'+name, func)
1438+ # Wrap it in a lock
1439+ lock = self.read_lock if ira else self.write_lock
1440+ setattr(self, name, wrap_simple(lock, func))
1441+
1442+ @property
1443+ def field_metadata(self):
1444+ return self.backend.field_metadata
1445+
1446+ def _format_abspath(self, book_id, fmt):
1447+ '''
1448+ Return absolute path to the ebook file of format `format`
1449+
1450+ WARNING: This method will return a dummy path for a network backend DB,
1451+ so do not rely on it, use format(..., as_path=True) instead.
1452+
1453+ Currently used only in calibredb list, the viewer and the catalogs (via
1454+ get_data_as_dict()).
1455+
1456+ Apart from the viewer, I don't believe any of the others do any file
1457+ I/O with the results of this call.
1458+ '''
1459+ try:
1460+ name = self.fields['formats'].format_fname(book_id, fmt)
1461+ path = self._field_for('path', book_id).replace('/', os.sep)
1462+ except:
1463+ return None
1464+ if name and path:
1465+ return self.backend.format_abspath(book_id, fmt, name, path)
1466+
1467+ def _get_metadata(self, book_id, get_user_categories=True): # {{{
1468+ mi = Metadata(None)
1469+ author_ids = self._field_ids_for('authors', book_id)
1470+ aut_list = [self._author_data(i) for i in author_ids]
1471+ aum = []
1472+ aus = {}
1473+ aul = {}
1474+ for rec in aut_list:
1475+ aut = rec['name']
1476+ aum.append(aut)
1477+ aus[aut] = rec['sort']
1478+ aul[aut] = rec['link']
1479+ mi.title = self._field_for('title', book_id,
1480+ default_value=_('Unknown'))
1481+ mi.authors = aum
1482+ mi.author_sort = self._field_for('author_sort', book_id,
1483+ default_value=_('Unknown'))
1484+ mi.author_sort_map = aus
1485+ mi.author_link_map = aul
1486+ mi.comments = self._field_for('comments', book_id)
1487+ mi.publisher = self._field_for('publisher', book_id)
1488+ n = now()
1489+ mi.timestamp = self._field_for('timestamp', book_id, default_value=n)
1490+ mi.pubdate = self._field_for('pubdate', book_id, default_value=n)
1491+ mi.uuid = self._field_for('uuid', book_id,
1492+ default_value='dummy')
1493+ mi.title_sort = self._field_for('sort', book_id,
1494+ default_value=_('Unknown'))
1495+ mi.book_size = self._field_for('size', book_id, default_value=0)
1496+ mi.ondevice_col = self._field_for('ondevice', book_id, default_value='')
1497+ mi.last_modified = self._field_for('last_modified', book_id,
1498+ default_value=n)
1499+ formats = self._field_for('formats', book_id)
1500+ mi.format_metadata = {}
1501+ if not formats:
1502+ formats = None
1503+ else:
1504+ for f in formats:
1505+ mi.format_metadata[f] = self._format_metadata(book_id, f)
1506+ formats = ','.join(formats)
1507+ mi.formats = formats
1508+ mi.has_cover = _('Yes') if self._field_for('cover', book_id,
1509+ default_value=False) else ''
1510+ mi.tags = list(self._field_for('tags', book_id, default_value=()))
1511+ mi.series = self._field_for('series', book_id)
1512+ if mi.series:
1513+ mi.series_index = self._field_for('series_index', book_id,
1514+ default_value=1.0)
1515+ mi.rating = self._field_for('rating', book_id)
1516+ mi.set_identifiers(self._field_for('identifiers', book_id,
1517+ default_value={}))
1518+ mi.application_id = book_id
1519+ mi.id = book_id
1520+ composites = {}
1521+ for key, meta in self.field_metadata.custom_iteritems():
1522+ mi.set_user_metadata(key, meta)
1523+ if meta['datatype'] == 'composite':
1524+ composites.append(key)
1525+ else:
1526+ mi.set(key, val=self._field_for(meta['label'], book_id),
1527+ extra=self._field_for(meta['label']+'_index', book_id))
1528+ for c in composites:
1529+ mi.set(key, val=self._composite_for(key, book_id, mi))
1530+
1531+ user_cat_vals = {}
1532+ if get_user_categories:
1533+ user_cats = self.prefs['user_categories']
1534+ for ucat in user_cats:
1535+ res = []
1536+ for name,cat,ign in user_cats[ucat]:
1537+ v = mi.get(cat, None)
1538+ if isinstance(v, list):
1539+ if name in v:
1540+ res.append([name,cat])
1541+ elif name == v:
1542+ res.append([name,cat])
1543+ user_cat_vals[ucat] = res
1544+ mi.user_categories = user_cat_vals
1545+
1546+ return mi
1547+ # }}}
1548+
1549+ # Cache Layer API {{{
1550+
1551+ @api
1552+ def init(self):
1553+ '''
1554+ Initialize this cache with data from the backend.
1555+ '''
1556+ with self.write_lock:
1557+ self.backend.read_tables()
1558+
1559+ for field, table in self.backend.tables.iteritems():
1560+ self.fields[field] = create_field(field, table)
1561+ if table.metadata['datatype'] == 'composite':
1562+ self.composites.add(field)
1563+
1564+ self.fields['ondevice'] = create_field('ondevice', None)
1565+
1566+ @read_api
1567+ def field_for(self, name, book_id, default_value=None):
1568+ '''
1569+ Return the value of the field ``name`` for the book identified by
1570+ ``book_id``. If no such book exists or it has no defined value for the
1571+ field ``name`` or no such field exists, then ``default_value`` is returned.
1572+
1573+ The returned value for is_multiple fields are always tuples.
1574+ '''
1575+ if self.composites and name in self.composites:
1576+ return self.composite_for(name, book_id,
1577+ default_value=default_value)
1578+ try:
1579+ return self.fields[name].for_book(book_id, default_value=default_value)
1580+ except (KeyError, IndexError):
1581+ return default_value
1582+
1583+ @read_api
1584+ def composite_for(self, name, book_id, mi=None, default_value=''):
1585+ try:
1586+ f = self.fields[name]
1587+ except KeyError:
1588+ return default_value
1589+
1590+ if mi is None:
1591+ return f.get_value_with_cache(book_id, partial(self._get_metadata,
1592+ get_user_categories=False))
1593+ else:
1594+ return f.render_composite(book_id, mi)
1595+
1596+ @read_api
1597+ def field_ids_for(self, name, book_id):
1598+ '''
1599+ Return the ids (as a tuple) for the values that the field ``name`` has on the book
1600+ identified by ``book_id``. If there are no values, or no such book, or
1601+ no such field, an empty tuple is returned.
1602+ '''
1603+ try:
1604+ return self.fields[name].ids_for_book(book_id)
1605+ except (KeyError, IndexError):
1606+ return ()
1607+
1608+ @read_api
1609+ def books_for_field(self, name, item_id):
1610+ '''
1611+ Return all the books associated with the item identified by
1612+ ``item_id``, where the item belongs to the field ``name``.
1613+
1614+ Returned value is a tuple of book ids, or the empty tuple if the item
1615+ or the field does not exist.
1616+ '''
1617+ try:
1618+ return self.fields[name].books_for(item_id)
1619+ except (KeyError, IndexError):
1620+ return ()
1621+
1622+ @read_api
1623+ def all_book_ids(self):
1624+ '''
1625+ Frozen set of all known book ids.
1626+ '''
1627+ return frozenset(self.fields['uuid'].iter_book_ids())
1628+
1629+ @read_api
1630+ def all_field_ids(self, name):
1631+ '''
1632+ Frozen set of ids for all values in the field ``name``.
1633+ '''
1634+ return frozenset(iter(self.fields[name]))
1635+
1636+ @read_api
1637+ def author_data(self, author_id):
1638+ '''
1639+ Return author data as a dictionary with keys: name, sort, link
1640+
1641+ If no author with the specified id is found an empty dictionary is
1642+ returned.
1643+ '''
1644+ try:
1645+ return self.fields['authors'].author_data(author_id)
1646+ except (KeyError, IndexError):
1647+ return {}
1648+
1649+ @read_api
1650+ def format_metadata(self, book_id, fmt, allow_cache=True):
1651+ if not fmt:
1652+ return {}
1653+ fmt = fmt.upper()
1654+ if allow_cache:
1655+ x = self.format_metadata_cache[book_id].get(fmt, None)
1656+ if x is not None:
1657+ return x
1658+ try:
1659+ name = self.fields['formats'].format_fname(book_id, fmt)
1660+ path = self._field_for('path', book_id).replace('/', os.sep)
1661+ except:
1662+ return {}
1663+
1664+ ans = {}
1665+ if path and name:
1666+ ans = self.backend.format_metadata(book_id, fmt, name, path)
1667+ self.format_metadata_cache[book_id][fmt] = ans
1668+ return ans
1669+
1670+ @api
1671+ def get_metadata(self, book_id,
1672+ get_cover=False, get_user_categories=True, cover_as_data=False):
1673+ '''
1674+ Return metadata for the book identified by book_id as a :class:`Metadata` object.
1675+ Note that the list of formats is not verified. If get_cover is True,
1676+ the cover is returned, either a path to temp file as mi.cover or if
1677+ cover_as_data is True then as mi.cover_data.
1678+ '''
1679+
1680+ with self.read_lock:
1681+ mi = self._get_metadata(book_id, get_user_categories=get_user_categories)
1682+
1683+ if get_cover:
1684+ if cover_as_data:
1685+ cdata = self.cover(book_id)
1686+ if cdata:
1687+ mi.cover_data = ('jpeg', cdata)
1688+ else:
1689+ mi.cover = self.cover(book_id, as_path=True)
1690+
1691+ return mi
1692+
1693+ @api
1694+ def cover(self, book_id,
1695+ as_file=False, as_image=False, as_path=False):
1696+ '''
1697+ Return the cover image or None. By default, returns the cover as a
1698+ bytestring.
1699+
1700+ WARNING: Using as_path will copy the cover to a temp file and return
1701+ the path to the temp file. You should delete the temp file when you are
1702+ done with it.
1703+
1704+ :param as_file: If True return the image as an open file object (a SpooledTemporaryFile)
1705+ :param as_image: If True return the image as a QImage object
1706+ :param as_path: If True return the image as a path pointing to a
1707+ temporary file
1708+ '''
1709+ with self.read_lock:
1710+ try:
1711+ path = self._field_for('path', book_id).replace('/', os.sep)
1712+ except:
1713+ return None
1714+
1715+ with self.record_lock.lock(book_id):
1716+ return self.backend.cover(path, as_file=as_file, as_image=as_image,
1717+ as_path=as_path)
1718+
1719+ @read_api
1720+ def multisort(self, fields):
1721+ all_book_ids = frozenset(self._all_book_ids())
1722+ get_metadata = partial(self._get_metadata, get_user_categories=False)
1723+
1724+ sort_keys = tuple(self.fields[field[0]].sort_keys_for_books(get_metadata,
1725+ all_book_ids) for field in fields)
1726+
1727+ if len(sort_keys) == 1:
1728+ sk = sort_keys[0]
1729+ return sorted(all_book_ids, key=lambda i:sk[i], reverse=not
1730+ fields[1])
1731+ else:
1732+ return sorted(all_book_ids, key=partial(SortKey, fields, sort_keys))
1733+
1734+ # }}}
1735+
1736+class SortKey(object):
1737+
1738+ def __init__(self, fields, sort_keys, book_id):
1739+ self.orders = tuple(1 if f[1] else -1 for f in fields)
1740+ self.sort_key = tuple(sk[book_id] for sk in sort_keys)
1741+
1742+ def __cmp__(self, other):
1743+ for i, order in enumerate(self.orders):
1744+ ans = cmp(self.sort_key[i], other.sort_key[i])
1745+ if ans != 0:
1746+ return ans * order
1747+ return 0
1748+
1749+
1750+# Testing {{{
1751+
1752+def test(library_path):
1753+ from calibre.db.backend import DB
1754+ backend = DB(library_path)
1755+ cache = Cache(backend)
1756+ cache.init()
1757+ print ('All book ids:', cache.all_book_ids())
1758+
1759+if __name__ == '__main__':
1760+ from calibre.utils.config import prefs
1761+ test(prefs['library_path'])
1762+
1763+# }}}
1764
1765=== added file 'src/calibre/db/fields.py'
1766--- src/calibre/db/fields.py 1970-01-01 00:00:00 +0000
1767+++ src/calibre/db/fields.py 2011-07-19 06:18:03 +0000
1768@@ -0,0 +1,257 @@
1769+#!/usr/bin/env python
1770+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
1771+from __future__ import (unicode_literals, division, absolute_import,
1772+ print_function)
1773+from future_builtins import map
1774+
1775+__license__ = 'GPL v3'
1776+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
1777+__docformat__ = 'restructuredtext en'
1778+
1779+from threading import Lock
1780+
1781+from calibre.db.tables import ONE_ONE, MANY_ONE, MANY_MANY
1782+from calibre.utils.icu import sort_key
1783+
1784+class Field(object):
1785+
1786+ def __init__(self, name, table):
1787+ self.name, self.table = name, table
1788+ self.has_text_data = self.metadata['datatype'] in ('text', 'comments',
1789+ 'series', 'enumeration')
1790+ self.table_type = self.table.table_type
1791+ dt = self.metadata['datatype']
1792+ self._sort_key = (sort_key if dt == 'text' else lambda x: x)
1793+
1794+ @property
1795+ def metadata(self):
1796+ return self.table.metadata
1797+
1798+ def for_book(self, book_id, default_value=None):
1799+ '''
1800+ Return the value of this field for the book identified by book_id.
1801+ When no value is found, returns ``default_value``.
1802+ '''
1803+ raise NotImplementedError()
1804+
1805+ def ids_for_book(self, book_id):
1806+ '''
1807+ Return a tuple of items ids for items associated with the book
1808+ identified by book_ids. Returns an empty tuple if no such items are
1809+ found.
1810+ '''
1811+ raise NotImplementedError()
1812+
1813+ def books_for(self, item_id):
1814+ '''
1815+ Return the ids of all books associated with the item identified by
1816+ item_id as a tuple. An empty tuple is returned if no books are found.
1817+ '''
1818+ raise NotImplementedError()
1819+
1820+ def __iter__(self):
1821+ '''
1822+ Iterate over the ids for all values in this field
1823+ '''
1824+ raise NotImplementedError()
1825+
1826+ def sort_keys_for_books(self, get_metadata, all_book_ids):
1827+ '''
1828+ Return a mapping of book_id -> sort_key. The sort key is suitable for
1829+ use in sorting the list of all books by this field, via the python cmp
1830+ method.
1831+ '''
1832+ raise NotImplementedError()
1833+
1834+
1835+class OneToOneField(Field):
1836+
1837+ def for_book(self, book_id, default_value=None):
1838+ return self.table.book_col_map.get(book_id, default_value)
1839+
1840+ def ids_for_book(self, book_id):
1841+ return (book_id,)
1842+
1843+ def books_for(self, item_id):
1844+ return (item_id,)
1845+
1846+ def __iter__(self):
1847+ return self.table.book_col_map.iterkeys()
1848+
1849+ def iter_book_ids(self):
1850+ return self.table.book_col_map.iterkeys()
1851+
1852+ def sort_keys_for_books(self, get_metadata, all_book_ids):
1853+ return {id_ : self._sort_key(self.book_col_map.get(id_, '')) for id_ in
1854+ all_book_ids}
1855+
1856+class CompositeField(OneToOneField):
1857+
1858+ def __init__(self, *args, **kwargs):
1859+ OneToOneField.__init__(self, *args, **kwargs)
1860+
1861+ self._render_cache = {}
1862+ self._lock = Lock()
1863+
1864+ def render_composite(self, book_id, mi):
1865+ with self._lock:
1866+ ans = self._render_cache.get(book_id, None)
1867+ if ans is None:
1868+ ans = mi.get(self.metadata['label'])
1869+ with self._lock:
1870+ self._render_cache[book_id] = ans
1871+ return ans
1872+
1873+ def clear_cache(self):
1874+ with self._lock:
1875+ self._render_cache = {}
1876+
1877+ def pop_cache(self, book_id):
1878+ with self._lock:
1879+ self._render_cache.pop(book_id, None)
1880+
1881+ def get_value_with_cache(self, book_id, get_metadata):
1882+ with self._lock:
1883+ ans = self._render_cache.get(book_id, None)
1884+ if ans is None:
1885+ mi = get_metadata(book_id)
1886+ ans = mi.get(self.metadata['label'])
1887+ return ans
1888+
1889+ def sort_keys_for_books(self, get_metadata, all_book_ids):
1890+ return {id_ : sort_key(self.get_value_with_cache(id_, get_metadata)) for id_ in
1891+ all_book_ids}
1892+
1893+
1894+class OnDeviceField(OneToOneField):
1895+
1896+ def __init__(self, name, table):
1897+ self.name = name
1898+ self.book_on_device_func = None
1899+
1900+ def book_on_device(self, book_id):
1901+ if callable(self.book_on_device_func):
1902+ return self.book_on_device_func(book_id)
1903+ return None
1904+
1905+ def set_book_on_device_func(self, func):
1906+ self.book_on_device_func = func
1907+
1908+ def for_book(self, book_id, default_value=None):
1909+ loc = []
1910+ count = 0
1911+ on = self.book_on_device(book_id)
1912+ if on is not None:
1913+ m, a, b, count = on[:4]
1914+ if m is not None:
1915+ loc.append(_('Main'))
1916+ if a is not None:
1917+ loc.append(_('Card A'))
1918+ if b is not None:
1919+ loc.append(_('Card B'))
1920+ return ', '.join(loc) + ((' (%s books)'%count) if count > 1 else '')
1921+
1922+ def __iter__(self):
1923+ return iter(())
1924+
1925+ def iter_book_ids(self):
1926+ return iter(())
1927+
1928+ def sort_keys_for_books(self, get_metadata, all_book_ids):
1929+ return {id_ : self.for_book(id_) for id_ in
1930+ all_book_ids}
1931+
1932+class ManyToOneField(Field):
1933+
1934+ def for_book(self, book_id, default_value=None):
1935+ ids = self.table.book_col_map.get(book_id, None)
1936+ if ids is not None:
1937+ ans = self.id_map[ids]
1938+ else:
1939+ ans = default_value
1940+ return ans
1941+
1942+ def ids_for_book(self, book_id):
1943+ id_ = self.table.book_col_map.get(book_id, None)
1944+ if id_ is None:
1945+ return ()
1946+ return (id_,)
1947+
1948+ def books_for(self, item_id):
1949+ return self.table.col_book_map.get(item_id, ())
1950+
1951+ def __iter__(self):
1952+ return self.table.id_map.iterkeys()
1953+
1954+ def sort_keys_for_books(self, get_metadata, all_book_ids):
1955+ keys = {id_ : self._sort_key(self.id_map.get(id_, '')) for id_ in
1956+ all_book_ids}
1957+ return {id_ : keys.get(
1958+ self.book_col_map.get(id_, None), '') for id_ in all_book_ids}
1959+
1960+class ManyToManyField(Field):
1961+
1962+ def __init__(self, *args, **kwargs):
1963+ Field.__init__(self, *args, **kwargs)
1964+ self.alphabetical_sort = self.name != 'authors'
1965+
1966+ def for_book(self, book_id, default_value=None):
1967+ ids = self.table.book_col_map.get(book_id, ())
1968+ if ids:
1969+ ans = tuple(self.id_map[i] for i in ids)
1970+ else:
1971+ ans = default_value
1972+ return ans
1973+
1974+ def ids_for_book(self, book_id):
1975+ return self.table.book_col_map.get(book_id, ())
1976+
1977+ def books_for(self, item_id):
1978+ return self.table.col_book_map.get(item_id, ())
1979+
1980+ def __iter__(self):
1981+ return self.table.id_map.iterkeys()
1982+
1983+ def sort_keys_for_books(self, get_metadata, all_book_ids):
1984+ keys = {id_ : self._sort_key(self.id_map.get(id_, '')) for id_ in
1985+ all_book_ids}
1986+
1987+ def sort_key_for_book(book_id):
1988+ item_ids = self.table.book_col_map.get(book_id, ())
1989+ if self.alphabetical_sort:
1990+ item_ids = sorted(item_ids, key=keys.get)
1991+ return tuple(map(keys.get, item_ids))
1992+
1993+ return {id_ : sort_key_for_book(id_) for id_ in all_book_ids}
1994+
1995+
1996+class AuthorsField(ManyToManyField):
1997+
1998+ def author_data(self, author_id):
1999+ return {
2000+ 'name' : self.table.id_map[author_id],
2001+ 'sort' : self.table.asort_map[author_id],
2002+ 'link' : self.table.alink_map[author_id],
2003+ }
2004+
2005+class FormatsField(ManyToManyField):
2006+
2007+ def format_fname(self, book_id, fmt):
2008+ return self.table.fname_map[book_id][fmt.upper()]
2009+
2010+def create_field(name, table):
2011+ cls = {
2012+ ONE_ONE : OneToOneField,
2013+ MANY_ONE : ManyToOneField,
2014+ MANY_MANY : ManyToManyField,
2015+ }[table.table_type]
2016+ if name == 'authors':
2017+ cls = AuthorsField
2018+ elif name == 'ondevice':
2019+ cls = OnDeviceField
2020+ elif name == 'formats':
2021+ cls = FormatsField
2022+ elif table.metadata['datatype'] == 'composite':
2023+ cls = CompositeField
2024+ return cls(name, table)
2025+
2026
2027=== modified file 'src/calibre/db/locking.py'
2028--- src/calibre/db/locking.py 2011-07-10 21:12:06 +0000
2029+++ src/calibre/db/locking.py 2011-07-19 06:18:03 +0000
2030@@ -7,7 +7,9 @@
2031 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
2032 __docformat__ = 'restructuredtext en'
2033
2034-from threading import Lock, Condition, current_thread
2035+from threading import Lock, Condition, current_thread, RLock
2036+from functools import partial
2037+from collections import Counter
2038
2039 class LockingError(RuntimeError):
2040 pass
2041@@ -37,7 +39,7 @@
2042 l = SHLock()
2043 return RWLockWrapper(l), RWLockWrapper(l, is_shared=False)
2044
2045-class SHLock(object):
2046+class SHLock(object): # {{{
2047 '''
2048 Shareable lock class. Used to implement the Multiple readers-single writer
2049 paradigm. As best as I can tell, neither writer nor reader starvation
2050@@ -79,6 +81,11 @@
2051 return self._acquire_exclusive(blocking)
2052 assert not (self.is_shared and self.is_exclusive)
2053
2054+ def owns_lock(self):
2055+ me = current_thread()
2056+ with self._lock:
2057+ return self._exclusive_owner is me or me in self._shared_owners
2058+
2059 def release(self):
2060 ''' Release the lock. '''
2061 # This decrements the appropriate lock counters, and if the lock
2062@@ -189,6 +196,8 @@
2063 def _return_waiter(self, waiter):
2064 self._free_waiters.append(waiter)
2065
2066+# }}}
2067+
2068 class RWLockWrapper(object):
2069
2070 def __init__(self, shlock, is_shared=True):
2071@@ -200,16 +209,124 @@
2072 return self
2073
2074 def __exit__(self, *args):
2075+ self.release()
2076+
2077+ def release(self):
2078 self._shlock.release()
2079
2080+ def owns_lock(self):
2081+ return self._shlock.owns_lock()
2082+
2083+class RecordLock(object):
2084+
2085+ '''
2086+ Lock records identified by hashable ids. To use
2087+
2088+ rl = RecordLock()
2089+
2090+ with rl.lock(some_id):
2091+ # do something
2092+
2093+ This will lock the record identified by some_id exclusively. The lock is
2094+ recursive, which means that you can lock the same record multiple times in
2095+ the same thread.
2096+
2097+ This class co-operates with the SHLock class. If you try to lock a record
2098+ in a thread that already holds the SHLock, a LockingError is raised. This
2099+ is to prevent the possibility of a cross-lock deadlock.
2100+
2101+ A cross-lock deadlock is still possible if you first lock a record and then
2102+ acquire the SHLock, but the usage pattern for this lock makes this highly
2103+ unlikely (this lock should be acquired immediately before any file I/O on
2104+ files in the library and released immediately after).
2105+ '''
2106+
2107+ class Wrap(object):
2108+
2109+ def __init__(self, release):
2110+ self.release = release
2111+
2112+ def __enter__(self):
2113+ return self
2114+
2115+ def __exit__(self, *args, **kwargs):
2116+ self.release()
2117+ self.release = None
2118+
2119+ def __init__(self, sh_lock):
2120+ self._lock = Lock()
2121+ # This is for recycling lock objects.
2122+ self._free_locks = [RLock()]
2123+ self._records = {}
2124+ self._counter = Counter()
2125+ self.sh_lock = sh_lock
2126+
2127+ def lock(self, record_id):
2128+ if self.sh_lock.owns_lock():
2129+ raise LockingError('Current thread already holds a shared lock,'
2130+ ' you cannot also ask for record lock as this could cause a'
2131+ ' deadlock.')
2132+ with self._lock:
2133+ l = self._records.get(record_id, None)
2134+ if l is None:
2135+ l = self._take_lock()
2136+ self._records[record_id] = l
2137+ self._counter[record_id] += 1
2138+ l.acquire()
2139+ return RecordLock.Wrap(partial(self.release, record_id))
2140+
2141+ def release(self, record_id):
2142+ with self._lock:
2143+ l = self._records.pop(record_id, None)
2144+ if l is None:
2145+ raise LockingError('No lock acquired for record %r'%record_id)
2146+ l.release()
2147+ self._counter[record_id] -= 1
2148+ if self._counter[record_id] > 0:
2149+ self._records[record_id] = l
2150+ else:
2151+ self._return_lock(l)
2152+
2153+ def _take_lock(self):
2154+ try:
2155+ return self._free_locks.pop()
2156+ except IndexError:
2157+ return RLock()
2158+
2159+ def _return_lock(self, lock):
2160+ self._free_locks.append(lock)
2161
2162 # Tests {{{
2163 if __name__ == '__main__':
2164 import time, random, unittest
2165 from threading import Thread
2166
2167- class TestSHLock(unittest.TestCase):
2168- """Testcases for SHLock class."""
2169+ class TestLock(unittest.TestCase):
2170+ """Testcases for Lock classes."""
2171+
2172+ def test_owns_locks(self):
2173+ lock = SHLock()
2174+ self.assertFalse(lock.owns_lock())
2175+ lock.acquire(shared=True)
2176+ self.assertTrue(lock.owns_lock())
2177+ lock.release()
2178+ self.assertFalse(lock.owns_lock())
2179+ lock.acquire(shared=False)
2180+ self.assertTrue(lock.owns_lock())
2181+ lock.release()
2182+ self.assertFalse(lock.owns_lock())
2183+
2184+ done = []
2185+ def test():
2186+ if not lock.owns_lock():
2187+ done.append(True)
2188+ lock.acquire()
2189+ t = Thread(target=test)
2190+ t.daemon = True
2191+ t.start()
2192+ t.join(1)
2193+ self.assertEqual(len(done), 1)
2194+ lock.release()
2195
2196 def test_multithread_deadlock(self):
2197 lock = SHLock()
2198@@ -345,8 +462,38 @@
2199 self.assertFalse(lock.is_shared)
2200 self.assertFalse(lock.is_exclusive)
2201
2202-
2203- suite = unittest.TestLoader().loadTestsFromTestCase(TestSHLock)
2204+ def test_record_lock(self):
2205+ shlock = SHLock()
2206+ lock = RecordLock(shlock)
2207+
2208+ shlock.acquire()
2209+ self.assertRaises(LockingError, lock.lock, 1)
2210+ shlock.release()
2211+ with lock.lock(1):
2212+ with lock.lock(1):
2213+ pass
2214+
2215+ def dolock():
2216+ with lock.lock(1):
2217+ time.sleep(0.1)
2218+
2219+ t = Thread(target=dolock)
2220+ t.daemon = True
2221+ with lock.lock(1):
2222+ t.start()
2223+ t.join(0.2)
2224+ self.assertTrue(t.is_alive())
2225+ t.join(0.11)
2226+ self.assertFalse(t.is_alive())
2227+
2228+ t = Thread(target=dolock)
2229+ t.daemon = True
2230+ with lock.lock(2):
2231+ t.start()
2232+ t.join(0.11)
2233+ self.assertFalse(t.is_alive())
2234+
2235+ suite = unittest.TestLoader().loadTestsFromTestCase(TestLock)
2236 unittest.TextTestRunner(verbosity=2).run(suite)
2237
2238 # }}}
2239
2240=== modified file 'src/calibre/db/tables.py'
2241--- src/calibre/db/tables.py 2011-07-03 16:16:09 +0000
2242+++ src/calibre/db/tables.py 2011-07-19 06:18:03 +0000
2243@@ -17,6 +17,8 @@
2244
2245 _c_speedup = plugins['speedup'][0]
2246
2247+ONE_ONE, MANY_ONE, MANY_MANY = xrange(3)
2248+
2249 def _c_convert_timestamp(val):
2250 if not val:
2251 return None
2252@@ -57,6 +59,8 @@
2253 timestamp, size, etc.
2254 '''
2255
2256+ table_type = ONE_ONE
2257+
2258 def read(self, db):
2259 self.book_col_map = {}
2260 idcol = 'id' if self.metadata['table'] == 'books' else 'book'
2261@@ -73,6 +77,17 @@
2262 'WHERE data.book=books.id) FROM books'):
2263 self.book_col_map[row[0]] = self.unserialize(row[1])
2264
2265+class CompositeTable(OneToOneTable):
2266+
2267+ def read(self, db):
2268+ self.book_col_map = {}
2269+ d = self.metadata['display']
2270+ self.composite_template = ['composite_template']
2271+ self.contains_html = d['contains_html']
2272+ self.make_category = d['make_category']
2273+ self.composite_sort = d['composite_sort']
2274+ self.use_decorations = d['use_decorations']
2275+
2276 class ManyToOneTable(Table):
2277
2278 '''
2279@@ -82,9 +97,10 @@
2280 Each book however has only one value for data of this type.
2281 '''
2282
2283+ table_type = MANY_ONE
2284+
2285 def read(self, db):
2286 self.id_map = {}
2287- self.extra_map = {}
2288 self.col_book_map = {}
2289 self.book_col_map = {}
2290 self.read_id_maps(db)
2291@@ -105,6 +121,9 @@
2292 self.col_book_map[row[1]].append(row[0])
2293 self.book_col_map[row[0]] = row[1]
2294
2295+ for key in tuple(self.col_book_map.iterkeys()):
2296+ self.col_book_map[key] = tuple(self.col_book_map[key])
2297+
2298 class ManyToManyTable(ManyToOneTable):
2299
2300 '''
2301@@ -113,6 +132,8 @@
2302 book. For example: tags or authors.
2303 '''
2304
2305+ table_type = MANY_MANY
2306+
2307 def read_maps(self, db):
2308 for row in db.conn.execute(
2309 'SELECT book, {0} FROM {1}'.format(
2310@@ -124,14 +145,21 @@
2311 self.book_col_map[row[0]] = []
2312 self.book_col_map[row[0]].append(row[1])
2313
2314+ for key in tuple(self.col_book_map.iterkeys()):
2315+ self.col_book_map[key] = tuple(self.col_book_map[key])
2316+
2317+ for key in tuple(self.book_col_map.iterkeys()):
2318+ self.book_col_map[key] = tuple(self.book_col_map[key])
2319+
2320 class AuthorsTable(ManyToManyTable):
2321
2322 def read_id_maps(self, db):
2323 self.alink_map = {}
2324+ self.asort_map = {}
2325 for row in db.conn.execute(
2326 'SELECT id, name, sort, link FROM authors'):
2327 self.id_map[row[0]] = row[1]
2328- self.extra_map[row[0]] = (row[2] if row[2] else
2329+ self.asort_map[row[0]] = (row[2] if row[2] else
2330 author_to_author_sort(row[1]))
2331 self.alink_map[row[0]] = row[3]
2332
2333@@ -141,14 +169,25 @@
2334 pass
2335
2336 def read_maps(self, db):
2337+ self.fname_map = {}
2338 for row in db.conn.execute('SELECT book, format, name FROM data'):
2339 if row[1] is not None:
2340- if row[1] not in self.col_book_map:
2341- self.col_book_map[row[1]] = []
2342- self.col_book_map[row[1]].append(row[0])
2343+ fmt = row[1].upper()
2344+ if fmt not in self.col_book_map:
2345+ self.col_book_map[fmt] = []
2346+ self.col_book_map[fmt].append(row[0])
2347 if row[0] not in self.book_col_map:
2348 self.book_col_map[row[0]] = []
2349- self.book_col_map[row[0]].append((row[1], row[2]))
2350+ self.book_col_map[row[0]].append(fmt)
2351+ if row[0] not in self.fname_map:
2352+ self.fname_map[row[0]] = {}
2353+ self.fname_map[row[0]][fmt] = row[2]
2354+
2355+ for key in tuple(self.col_book_map.iterkeys()):
2356+ self.col_book_map[key] = tuple(self.col_book_map[key])
2357+
2358+ for key in tuple(self.book_col_map.iterkeys()):
2359+ self.book_col_map[key] = tuple(self.book_col_map[key])
2360
2361 class IdentifiersTable(ManyToManyTable):
2362
2363@@ -162,6 +201,9 @@
2364 self.col_book_map[row[1]] = []
2365 self.col_book_map[row[1]].append(row[0])
2366 if row[0] not in self.book_col_map:
2367- self.book_col_map[row[0]] = []
2368- self.book_col_map[row[0]].append((row[1], row[2]))
2369+ self.book_col_map[row[0]] = {}
2370+ self.book_col_map[row[0]][row[1]] = row[2]
2371+
2372+ for key in tuple(self.col_book_map.iterkeys()):
2373+ self.col_book_map[key] = tuple(self.col_book_map[key])
2374
2375
2376=== added file 'src/calibre/db/view.py'
2377--- src/calibre/db/view.py 1970-01-01 00:00:00 +0000
2378+++ src/calibre/db/view.py 2011-07-19 06:18:03 +0000
2379@@ -0,0 +1,109 @@
2380+#!/usr/bin/env python
2381+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
2382+from __future__ import (unicode_literals, division, absolute_import,
2383+ print_function)
2384+
2385+__license__ = 'GPL v3'
2386+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
2387+__docformat__ = 'restructuredtext en'
2388+
2389+from functools import partial
2390+
2391+def sanitize_sort_field_name(field_metadata, field):
2392+ field = field_metadata.search_term_to_field_key(field.lower().strip())
2393+ # translate some fields to their hidden equivalent
2394+ field = {'title': 'sort', 'authors':'author_sort'}.get(field, field)
2395+ return field
2396+
2397+class View(object):
2398+
2399+ def __init__(self, cache):
2400+ self.cache = cache
2401+ self.marked_ids = {}
2402+ self._field_getters = {}
2403+ for col, idx in cache.backend.FIELD_MAP.iteritems():
2404+ if isinstance(col, int):
2405+ label = self.cache.backend.custom_column_num_map[col]['label']
2406+ label = (self.cache.backend.field_metadata.custom_field_prefix
2407+ + label)
2408+ self._field_getters[idx] = partial(self.get, label)
2409+ else:
2410+ try:
2411+ self._field_getters[idx] = {
2412+ 'id' : self._get_id,
2413+ 'au_map' : self.get_author_data,
2414+ 'ondevice': self.get_ondevice,
2415+ 'marked' : self.get_marked,
2416+ }[col]
2417+ except KeyError:
2418+ self._field_getters[idx] = partial(self.get, col)
2419+
2420+ self._map = list(self.cache.all_book_ids())
2421+ self._map_filtered = list(self._map)
2422+
2423+ @property
2424+ def field_metadata(self):
2425+ return self.cache.field_metadata
2426+
2427+ def _get_id(self, idx, index_is_id=True):
2428+ ans = idx if index_is_id else self.index_to_id(idx)
2429+ return ans
2430+
2431+ def get_field_map_field(self, row, col, index_is_id=True):
2432+ '''
2433+ Supports the legacy FIELD_MAP interface for getting metadata. Do not use
2434+ in new code.
2435+ '''
2436+ getter = self._field_getters[col]
2437+ return getter(row, index_is_id=index_is_id)
2438+
2439+ def index_to_id(self, idx):
2440+ return self._map_filtered[idx]
2441+
2442+ def get(self, field, idx, index_is_id=True, default_value=None):
2443+ id_ = idx if index_is_id else self.index_to_id(idx)
2444+ return self.cache.field_for(field, id_)
2445+
2446+ def get_ondevice(self, idx, index_is_id=True, default_value=''):
2447+ id_ = idx if index_is_id else self.index_to_id(idx)
2448+ self.cache.field_for('ondevice', id_, default_value=default_value)
2449+
2450+ def get_marked(self, idx, index_is_id=True, default_value=None):
2451+ id_ = idx if index_is_id else self.index_to_id(idx)
2452+ return self.marked_ids.get(id_, default_value)
2453+
2454+ def get_author_data(self, idx, index_is_id=True, default_value=()):
2455+ '''
2456+ Return author data for all authors of the book identified by idx as a
2457+ tuple of dictionaries. The dictionaries should never be empty, unless
2458+ there is a bug somewhere. The list could be empty if idx point to an
2459+ non existent book, or book with no authors (though again a book with no
2460+ authors should never happen).
2461+
2462+ Each dictionary has the keys: name, sort, link. Link can be an empty
2463+ string.
2464+
2465+ default_value is ignored, this method always returns a tuple
2466+ '''
2467+ id_ = idx if index_is_id else self.index_to_id(idx)
2468+ with self.cache.read_lock:
2469+ ids = self.cache._field_ids_for('authors', id_)
2470+ ans = []
2471+ for id_ in ids:
2472+ ans.append(self.cache._author_data(id_))
2473+ return tuple(ans)
2474+
2475+ def multisort(self, fields=[], subsort=False):
2476+ fields = [(sanitize_sort_field_name(self.field_metadata, x), bool(y)) for x, y in fields]
2477+ keys = self.field_metadata.sortable_field_keys()
2478+ fields = [x for x in fields if x[0] in keys]
2479+ if subsort and 'sort' not in [x[0] for x in fields]:
2480+ fields += [('sort', True)]
2481+ if not fields:
2482+ fields = [('timestamp', False)]
2483+
2484+ sorted_book_ids = self.cache.multisort(fields)
2485+ sorted_book_ids
2486+ # TODO: change maps
2487+
2488+
2489
2490=== modified file 'src/calibre/devices/android/driver.py'
2491--- src/calibre/devices/android/driver.py 2011-07-10 15:37:25 +0000
2492+++ src/calibre/devices/android/driver.py 2011-07-19 06:18:03 +0000
2493@@ -39,7 +39,7 @@
2494 0x22b8 : { 0x41d9 : [0x216], 0x2d61 : [0x100], 0x2d67 : [0x100],
2495 0x41db : [0x216], 0x4285 : [0x216], 0x42a3 : [0x216],
2496 0x4286 : [0x216], 0x42b3 : [0x216], 0x42b4 : [0x216],
2497- 0x7086 : [0x0226], 0x70a8: [0x9999],
2498+ 0x7086 : [0x0226], 0x70a8: [0x9999], 0x42c4 : [0x216],
2499 },
2500
2501 # Sony Ericsson
2502@@ -60,6 +60,7 @@
2503 0x685e : [0x0400],
2504 0x6860 : [0x0400],
2505 0x6877 : [0x0400],
2506+ 0x689e : [0x0400],
2507 },
2508
2509 # Viewsonic
2510@@ -124,7 +125,8 @@
2511 'IDEOS_TABLET', 'MYTOUCH_4G', 'UMS_COMPOSITE', 'SCH-I800_CARD',
2512 '7', 'A956', 'A955', 'A43', 'ANDROID_PLATFORM', 'TEGRA_2',
2513 'MB860', 'MULTI-CARD', 'MID7015A', 'INCREDIBLE', 'A7EB', 'STREAK',
2514- 'MB525', 'ANDROID2.3', 'SGH-I997']
2515+ 'MB525', 'ANDROID2.3', 'SGH-I997', 'GT-I5800_CARD', 'MB612',
2516+ 'GT-S5830_CARD']
2517 WINDOWS_CARD_A_MEM = ['ANDROID_PHONE', 'GT-I9000_CARD', 'SGH-I897',
2518 'FILE-STOR_GADGET', 'SGH-T959', 'SAMSUNG_ANDROID', 'GT-P1000_CARD',
2519 'A70S', 'A101IT', '7', 'INCREDIBLE', 'A7EB', 'SGH-T849_CARD',
2520
2521=== modified file 'src/calibre/devices/kobo/driver.py'
2522--- src/calibre/devices/kobo/driver.py 2011-07-10 00:37:28 +0000
2523+++ src/calibre/devices/kobo/driver.py 2011-07-19 06:18:03 +0000
2524@@ -7,6 +7,7 @@
2525
2526 import os
2527 import sqlite3 as sqlite
2528+from contextlib import closing
2529
2530 from calibre.devices.usbms.books import BookList
2531 from calibre.devices.kobo.books import Book
2532@@ -22,7 +23,7 @@
2533 gui_name = 'Kobo Reader'
2534 description = _('Communicate with the Kobo Reader')
2535 author = 'Timothy Legge'
2536- version = (1, 0, 9)
2537+ version = (1, 0, 10)
2538
2539 dbversion = 0
2540 fwversion = 0
2541@@ -48,12 +49,16 @@
2542
2543 VIRTUAL_BOOK_EXTENSIONS = frozenset(['kobo'])
2544
2545- EXTRA_CUSTOMIZATION_MESSAGE = _('The Kobo supports only one collection '
2546- 'currently: the \"Im_Reading\" list. Create a tag called \"Im_Reading\" ')+\
2547- 'for automatic management'
2548+ EXTRA_CUSTOMIZATION_MESSAGE = [
2549+ _('The Kobo supports several collections including ')+\
2550+ 'Read, Closed, Im_Reading ' +\
2551+ _('Create tags for automatic management'),
2552+ ]
2553
2554 EXTRA_CUSTOMIZATION_DEFAULT = ', '.join(['tags'])
2555
2556+ OPT_COLLECTIONS = 0
2557+
2558 def initialize(self):
2559 USBMS.initialize(self)
2560 self.book_class = Book
2561@@ -188,77 +193,78 @@
2562 traceback.print_exc()
2563 return changed
2564
2565- connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
2566-
2567- # return bytestrings if the content cannot the decoded as unicode
2568- connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
2569-
2570- cursor = connection.cursor()
2571-
2572- #query = 'select count(distinct volumeId) from volume_shortcovers'
2573- #cursor.execute(query)
2574- #for row in (cursor):
2575- # numrows = row[0]
2576- #cursor.close()
2577-
2578- # Determine the database version
2579- # 4 - Bluetooth Kobo Rev 2 (1.4)
2580- # 8 - WIFI KOBO Rev 1
2581- cursor.execute('select version from dbversion')
2582- result = cursor.fetchone()
2583- self.dbversion = result[0]
2584-
2585- debug_print("Database Version: ", self.dbversion)
2586- if self.dbversion >= 16:
2587- query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2588- 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \
2589- 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
2590- elif self.dbversion < 16 and self.dbversion >= 14:
2591- query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2592- 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \
2593- 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
2594- elif self.dbversion < 14 and self.dbversion >= 8:
2595- query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2596- 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \
2597- 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
2598- else:
2599- query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2600- 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null'
2601-
2602- try:
2603- cursor.execute (query)
2604- except Exception as e:
2605- err = str(e)
2606- if not ('___ExpirationStatus' in err or 'FavouritesIndex' in err or
2607- 'Accessibility' in err):
2608- raise
2609- query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, '
2610- 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as '
2611- 'FavouritesIndex, "-1" as Accessibility from content where '
2612- 'BookID is Null')
2613- cursor.execute(query)
2614-
2615- changed = False
2616- for i, row in enumerate(cursor):
2617- # self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
2618- if row[3].startswith("file:///usr/local/Kobo/help/"):
2619- # These are internal to the Kobo device and do not exist
2620- continue
2621- path = self.path_from_contentid(row[3], row[5], row[4], oncard)
2622- mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip'
2623- # debug_print("mime:", mime)
2624-
2625- if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
2626- changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10])
2627- # print "shortbook: " + path
2628- elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
2629- changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10])
2630-
2631- if changed:
2632- need_sync = True
2633-
2634- cursor.close()
2635- connection.close()
2636+ with closing(sqlite.connect(
2637+ self.normalize_path(self._main_prefix +
2638+ '.kobo/KoboReader.sqlite'))) as connection:
2639+
2640+ # return bytestrings if the content cannot the decoded as unicode
2641+ connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
2642+
2643+ cursor = connection.cursor()
2644+
2645+ #query = 'select count(distinct volumeId) from volume_shortcovers'
2646+ #cursor.execute(query)
2647+ #for row in (cursor):
2648+ # numrows = row[0]
2649+ #cursor.close()
2650+
2651+ # Determine the database version
2652+ # 4 - Bluetooth Kobo Rev 2 (1.4)
2653+ # 8 - WIFI KOBO Rev 1
2654+ cursor.execute('select version from dbversion')
2655+ result = cursor.fetchone()
2656+ self.dbversion = result[0]
2657+
2658+ debug_print("Database Version: ", self.dbversion)
2659+ if self.dbversion >= 16:
2660+ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2661+ 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, Accessibility from content where ' \
2662+ 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
2663+ elif self.dbversion < 16 and self.dbversion >= 14:
2664+ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2665+ 'ImageID, ReadStatus, ___ExpirationStatus, FavouritesIndex, "-1" as Accessibility from content where ' \
2666+ 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
2667+ elif self.dbversion < 14 and self.dbversion >= 8:
2668+ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2669+ 'ImageID, ReadStatus, ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where ' \
2670+ 'BookID is Null and ( ___ExpirationStatus <> "3" or ___ExpirationStatus is Null)'
2671+ else:
2672+ query= 'select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, ' \
2673+ 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as FavouritesIndex, "-1" as Accessibility from content where BookID is Null'
2674+
2675+ try:
2676+ cursor.execute (query)
2677+ except Exception as e:
2678+ err = str(e)
2679+ if not ('___ExpirationStatus' in err or 'FavouritesIndex' in err or
2680+ 'Accessibility' in err):
2681+ raise
2682+ query= ('select Title, Attribution, DateCreated, ContentID, MimeType, ContentType, '
2683+ 'ImageID, ReadStatus, "-1" as ___ExpirationStatus, "-1" as '
2684+ 'FavouritesIndex, "-1" as Accessibility from content where '
2685+ 'BookID is Null')
2686+ cursor.execute(query)
2687+
2688+ changed = False
2689+ for i, row in enumerate(cursor):
2690+ # self.report_progress((i+1) / float(numrows), _('Getting list of books on device...'))
2691+ if row[3].startswith("file:///usr/local/Kobo/help/"):
2692+ # These are internal to the Kobo device and do not exist
2693+ continue
2694+ path = self.path_from_contentid(row[3], row[5], row[4], oncard)
2695+ mime = mime_type_ext(path_to_ext(path)) if path.find('kepub') == -1 else 'application/epub+zip'
2696+ # debug_print("mime:", mime)
2697+
2698+ if oncard != 'carda' and oncard != 'cardb' and not row[3].startswith("file:///mnt/sd/"):
2699+ changed = update_booklist(self._main_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10])
2700+ # print "shortbook: " + path
2701+ elif oncard == 'carda' and row[3].startswith("file:///mnt/sd/"):
2702+ changed = update_booklist(self._card_a_prefix, path, row[0], row[1], mime, row[2], row[5], row[6], row[7], row[4], row[8], row[9], row[10])
2703+
2704+ if changed:
2705+ need_sync = True
2706+
2707+ cursor.close()
2708
2709 # Remove books that are no longer in the filesystem. Cache contains
2710 # indices into the booklist if book not in filesystem, None otherwise
2711@@ -288,56 +294,56 @@
2712 # 2) content
2713
2714 debug_print('delete_via_sql: ContentID: ', ContentID, 'ContentType: ', ContentType)
2715- connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
2716-
2717- # return bytestrings if the content cannot the decoded as unicode
2718- connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
2719-
2720- cursor = connection.cursor()
2721- t = (ContentID,)
2722- cursor.execute('select ImageID from content where ContentID = ?', t)
2723-
2724- ImageID = None
2725- for row in cursor:
2726- # First get the ImageID to delete the images
2727- ImageID = row[0]
2728- cursor.close()
2729-
2730- cursor = connection.cursor()
2731- if ContentType == 6 and self.dbversion < 8:
2732- # Delete the shortcover_pages first
2733- cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
2734-
2735- #Delete the volume_shortcovers second
2736- cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
2737-
2738- # Delete the rows from content_keys
2739- if self.dbversion >= 8:
2740- cursor.execute('delete from content_keys where volumeid = ?', t)
2741-
2742- # Delete the chapters associated with the book next
2743- t = (ContentID,)
2744- # Kobo does not delete the Book row (ie the row where the BookID is Null)
2745- # The next server sync should remove the row
2746- cursor.execute('delete from content where BookID = ?', t)
2747- try:
2748- cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0, ___ExpirationStatus=3 ' \
2749- 'where BookID is Null and ContentID =?',t)
2750- except Exception as e:
2751- if 'no such column' not in str(e):
2752- raise
2753- cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \
2754- 'where BookID is Null and ContentID =?',t)
2755-
2756-
2757- connection.commit()
2758-
2759- cursor.close()
2760- if ImageID == None:
2761- print "Error condition ImageID was not found"
2762- print "You likely tried to delete a book that the kobo has not yet added to the database"
2763-
2764- connection.close()
2765+ with closing(sqlite.connect(self.normalize_path(self._main_prefix +
2766+ '.kobo/KoboReader.sqlite'))) as connection:
2767+
2768+ # return bytestrings if the content cannot the decoded as unicode
2769+ connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
2770+
2771+ cursor = connection.cursor()
2772+ t = (ContentID,)
2773+ cursor.execute('select ImageID from content where ContentID = ?', t)
2774+
2775+ ImageID = None
2776+ for row in cursor:
2777+ # First get the ImageID to delete the images
2778+ ImageID = row[0]
2779+ cursor.close()
2780+
2781+ cursor = connection.cursor()
2782+ if ContentType == 6 and self.dbversion < 8:
2783+ # Delete the shortcover_pages first
2784+ cursor.execute('delete from shortcover_page where shortcoverid in (select ContentID from content where BookID = ?)', t)
2785+
2786+ #Delete the volume_shortcovers second
2787+ cursor.execute('delete from volume_shortcovers where volumeid = ?', t)
2788+
2789+ # Delete the rows from content_keys
2790+ if self.dbversion >= 8:
2791+ cursor.execute('delete from content_keys where volumeid = ?', t)
2792+
2793+ # Delete the chapters associated with the book next
2794+ t = (ContentID,)
2795+ # Kobo does not delete the Book row (ie the row where the BookID is Null)
2796+ # The next server sync should remove the row
2797+ cursor.execute('delete from content where BookID = ?', t)
2798+ try:
2799+ cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0, ___ExpirationStatus=3 ' \
2800+ 'where BookID is Null and ContentID =?',t)
2801+ except Exception as e:
2802+ if 'no such column' not in str(e):
2803+ raise
2804+ cursor.execute('update content set ReadStatus=0, FirstTimeReading = \'true\', ___PercentRead=0 ' \
2805+ 'where BookID is Null and ContentID =?',t)
2806+
2807+
2808+ connection.commit()
2809+
2810+ cursor.close()
2811+ if ImageID == None:
2812+ print "Error condition ImageID was not found"
2813+ print "You likely tried to delete a book that the kobo has not yet added to the database"
2814+
2815 # If all this succeeds we need to delete the images files via the ImageID
2816 return ImageID
2817
2818@@ -664,50 +670,49 @@
2819 # Needs to be outside books collection as in the case of removing
2820 # the last book from the collection the list of books is empty
2821 # and the removal of the last book would not occur
2822- connection = sqlite.connect(self.normalize_path(self._main_prefix + '.kobo/KoboReader.sqlite'))
2823-
2824- # return bytestrings if the content cannot the decoded as unicode
2825- connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
2826-
2827- if collections:
2828-
2829- # Need to reset the collections outside the particular loops
2830- # otherwise the last item will not be removed
2831- self.reset_readstatus(connection, oncard)
2832- if self.dbversion >= 14:
2833- self.reset_favouritesindex(connection, oncard)
2834-
2835- # Process any collections that exist
2836- for category, books in collections.items():
2837- debug_print("Category: ", category, " id = ", readstatuslist.get(category))
2838- for book in books:
2839- debug_print(' Title:', book.title, 'category: ', category)
2840- if category not in book.device_collections:
2841- book.device_collections.append(category)
2842-
2843- extension = os.path.splitext(book.path)[1]
2844- ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
2845-
2846- ContentID = self.contentid_from_path(book.path, ContentType)
2847-
2848- if category in readstatuslist.keys():
2849- # Manage ReadStatus
2850- self.set_readstatus(connection, ContentID, readstatuslist.get(category))
2851- if category == 'Shortlist' and self.dbversion >= 14:
2852- # Manage FavouritesIndex/Shortlist
2853- self.set_favouritesindex(connection, ContentID)
2854- if category in accessibilitylist.keys():
2855- # Do not manage the Accessibility List
2856- pass
2857- else: # No collections
2858- # Since no collections exist the ReadStatus needs to be reset to 0 (Unread)
2859- debug_print("No Collections - reseting ReadStatus")
2860- self.reset_readstatus(connection, oncard)
2861- if self.dbversion >= 14:
2862- debug_print("No Collections - reseting FavouritesIndex")
2863- self.reset_favouritesindex(connection, oncard)
2864-
2865- connection.close()
2866+ with closing(sqlite.connect(self.normalize_path(self._main_prefix +
2867+ '.kobo/KoboReader.sqlite'))) as connection:
2868+
2869+ # return bytestrings if the content cannot the decoded as unicode
2870+ connection.text_factory = lambda x: unicode(x, "utf-8", "ignore")
2871+
2872+ if collections:
2873+
2874+ # Need to reset the collections outside the particular loops
2875+ # otherwise the last item will not be removed
2876+ self.reset_readstatus(connection, oncard)
2877+ if self.dbversion >= 14:
2878+ self.reset_favouritesindex(connection, oncard)
2879+
2880+ # Process any collections that exist
2881+ for category, books in collections.items():
2882+ debug_print("Category: ", category, " id = ", readstatuslist.get(category))
2883+ for book in books:
2884+ debug_print(' Title:', book.title, 'category: ', category)
2885+ if category not in book.device_collections:
2886+ book.device_collections.append(category)
2887+
2888+ extension = os.path.splitext(book.path)[1]
2889+ ContentType = self.get_content_type_from_extension(extension) if extension != '' else self.get_content_type_from_path(book.path)
2890+
2891+ ContentID = self.contentid_from_path(book.path, ContentType)
2892+
2893+ if category in readstatuslist.keys():
2894+ # Manage ReadStatus
2895+ self.set_readstatus(connection, ContentID, readstatuslist.get(category))
2896+ elif category == 'Shortlist' and self.dbversion >= 14:
2897+ # Manage FavouritesIndex/Shortlist
2898+ self.set_favouritesindex(connection, ContentID)
2899+ elif category in accessibilitylist.keys():
2900+ # Do not manage the Accessibility List
2901+ pass
2902+ else: # No collections
2903+ # Since no collections exist the ReadStatus needs to be reset to 0 (Unread)
2904+ debug_print("No Collections - reseting ReadStatus")
2905+ self.reset_readstatus(connection, oncard)
2906+ if self.dbversion >= 14:
2907+ debug_print("No Collections - reseting FavouritesIndex")
2908+ self.reset_favouritesindex(connection, oncard)
2909
2910 # debug_print('Finished update_device_database_collections', collections_attributes)
2911
2912@@ -723,7 +728,7 @@
2913 opts = self.settings()
2914 if opts.extra_customization:
2915 collections = [x.lower().strip() for x in
2916- opts.extra_customization.split(',')]
2917+ opts.extra_customization[self.OPT_COLLECTIONS].split(',')]
2918 else:
2919 collections = []
2920
2921
2922=== modified file 'src/calibre/devices/usbms/device.py'
2923--- src/calibre/devices/usbms/device.py 2011-06-27 20:59:32 +0000
2924+++ src/calibre/devices/usbms/device.py 2011-07-19 06:18:03 +0000
2925@@ -1077,8 +1077,13 @@
2926 settings = self.settings()
2927 template = self.save_template()
2928 if mdata.tags and _('News') in mdata.tags:
2929- today = time.localtime()
2930- template = "{title}_%d-%d-%d" % (today[0], today[1], today[2])
2931+ try:
2932+ p = mdata.pubdate
2933+ date = (p.year, p.month, p.day)
2934+ except:
2935+ today = time.localtime()
2936+ date = (today[0], today[1], today[2])
2937+ template = "{title}_%d-%d-%d" % date
2938 use_subdirs = self.SUPPORTS_SUB_DIRS and settings.use_subdirs
2939
2940 fname = sanitize(fname)
2941
2942=== modified file 'src/calibre/devices/usbms/driver.py'
2943--- src/calibre/devices/usbms/driver.py 2011-06-09 16:10:44 +0000
2944+++ src/calibre/devices/usbms/driver.py 2011-07-19 06:18:03 +0000
2945@@ -94,11 +94,29 @@
2946 self.report_progress(1.0, _('Get device information...'))
2947 self.driveinfo = {}
2948 if self._main_prefix is not None:
2949- self.driveinfo['main'] = self._update_driveinfo_file(self._main_prefix, 'main')
2950- if self._card_a_prefix is not None:
2951- self.driveinfo['A'] = self._update_driveinfo_file(self._card_a_prefix, 'A')
2952- if self._card_b_prefix is not None:
2953- self.driveinfo['B'] = self._update_driveinfo_file(self._card_b_prefix, 'B')
2954+ try:
2955+ self.driveinfo['main'] = self._update_driveinfo_file(self._main_prefix, 'main')
2956+ except (IOError, OSError) as e:
2957+ raise IOError(_('Failed to access files in the main memory of'
2958+ ' your device. You should contact the device'
2959+ ' manufacturer for support. Common fixes are:'
2960+ ' try a different USB cable/USB port on your computer.'
2961+ ' If you device has a "Reset to factory defaults" type'
2962+ ' of setting somewhere, use it. Underlying error: %s')
2963+ % e)
2964+ try:
2965+ if self._card_a_prefix is not None:
2966+ self.driveinfo['A'] = self._update_driveinfo_file(self._card_a_prefix, 'A')
2967+ if self._card_b_prefix is not None:
2968+ self.driveinfo['B'] = self._update_driveinfo_file(self._card_b_prefix, 'B')
2969+ except (IOError, OSError) as e:
2970+ raise IOError(_('Failed to access files on the SD card in your'
2971+ ' device. This can happen for many reasons. The SD card may be'
2972+ ' corrupted, it may be too large for your device, it may be'
2973+ ' write-protected, etc. Try a different SD card, or reformat'
2974+ ' your SD card using the FAT32 filesystem. Also make sure'
2975+ ' there are not too many files in the root of your SD card.'
2976+ ' Underlying error: %s') % e)
2977 return (self.get_gui_name(), '', '', '', self.driveinfo)
2978
2979 def set_driveinfo_name(self, location_code, name):
2980
2981=== modified file 'src/calibre/ebooks/__init__.py'
2982--- src/calibre/ebooks/__init__.py 2011-04-21 19:40:56 +0000
2983+++ src/calibre/ebooks/__init__.py 2011-07-19 06:18:03 +0000
2984@@ -159,7 +159,7 @@
2985 return x
2986
2987 def calibre_cover(title, author_string, series_string=None,
2988- output_format='jpg', title_size=46, author_size=36):
2989+ output_format='jpg', title_size=46, author_size=36, logo_path=None):
2990 title = normalize(title)
2991 author_string = normalize(author_string)
2992 series_string = normalize(series_string)
2993@@ -167,7 +167,9 @@
2994 lines = [TextLine(title, title_size), TextLine(author_string, author_size)]
2995 if series_string:
2996 lines.append(TextLine(series_string, author_size))
2997- return create_cover_page(lines, I('library.png'), output_format='jpg')
2998+ if logo_path is None:
2999+ logo_path = I('library.png')
3000+ return create_cover_page(lines, logo_path, output_format='jpg')
3001
3002 UNIT_RE = re.compile(r'^(-*[0-9]*[.]?[0-9]*)\s*(%|em|ex|en|px|mm|cm|in|pt|pc)$')
3003
3004
3005=== modified file 'src/calibre/ebooks/chardet/__init__.py'
3006--- src/calibre/ebooks/chardet/__init__.py 2011-04-02 16:40:58 +0000
3007+++ src/calibre/ebooks/chardet/__init__.py 2011-07-19 06:18:03 +0000
3008@@ -38,8 +38,12 @@
3009 ENTITY_PATTERN = re.compile(r'&(\S+?);')
3010
3011 def strip_encoding_declarations(raw):
3012+ limit = 50*1024
3013 for pat in ENCODING_PATS:
3014- raw = pat.sub('', raw)
3015+ prefix = raw[:limit]
3016+ suffix = raw[limit:]
3017+ prefix = pat.sub('', prefix)
3018+ raw = prefix + suffix
3019 return raw
3020
3021 def substitute_entites(raw):
3022
3023=== modified file 'src/calibre/ebooks/conversion/cli.py'
3024--- src/calibre/ebooks/conversion/cli.py 2011-07-10 19:09:11 +0000
3025+++ src/calibre/ebooks/conversion/cli.py 2011-07-19 06:18:03 +0000
3026@@ -137,7 +137,9 @@
3027 'extra_css', 'smarten_punctuation',
3028 'margin_top', 'margin_left', 'margin_right',
3029 'margin_bottom', 'change_justification',
3030- 'insert_blank_line', 'remove_paragraph_spacing','remove_paragraph_spacing_indent_size',
3031+ 'insert_blank_line', 'insert_blank_line_size',
3032+ 'remove_paragraph_spacing',
3033+ 'remove_paragraph_spacing_indent_size',
3034 'asciiize',
3035 ]
3036 ),
3037
3038=== modified file 'src/calibre/ebooks/conversion/plumber.py'
3039--- src/calibre/ebooks/conversion/plumber.py 2011-07-06 18:10:08 +0000
3040+++ src/calibre/ebooks/conversion/plumber.py 2011-07-19 06:18:03 +0000
3041@@ -366,9 +366,9 @@
3042
3043 OptionRecommendation(name='remove_paragraph_spacing_indent_size',
3044 recommended_value=1.5, level=OptionRecommendation.LOW,
3045- help=_('When calibre removes inter paragraph spacing, it automatically '
3046+ help=_('When calibre removes blank lines between paragraphs, it automatically '
3047 'sets a paragraph indent, to ensure that paragraphs can be easily '
3048- 'distinguished. This option controls the width of that indent.')
3049+ 'distinguished. This option controls the width of that indent (in em).')
3050 ),
3051
3052 OptionRecommendation(name='prefer_metadata_cover',
3053@@ -384,6 +384,13 @@
3054 )
3055 ),
3056
3057+OptionRecommendation(name='insert_blank_line_size',
3058+ recommended_value=0.5, level=OptionRecommendation.LOW,
3059+ help=_('Set the height of the inserted blank lines (in em).'
3060+ ' The height of the lines between paragraphs will be twice the value'
3061+ ' set here.')
3062+ ),
3063+
3064 OptionRecommendation(name='remove_first_image',
3065 recommended_value=False, level=OptionRecommendation.LOW,
3066 help=_('Remove the first image from the input ebook. Useful if the '
3067@@ -602,7 +609,7 @@
3068 input_fmt = os.path.splitext(self.input)[1]
3069 if not input_fmt:
3070 raise ValueError('Input file must have an extension')
3071- input_fmt = input_fmt[1:].lower()
3072+ input_fmt = input_fmt[1:].lower().replace('original_', '')
3073 self.archive_input_tdir = None
3074 if input_fmt in ARCHIVE_FMTS:
3075 self.log('Processing archive...')
3076@@ -1048,6 +1055,7 @@
3077 with self.output_plugin:
3078 self.output_plugin.convert(self.oeb, self.output, self.input_plugin,
3079 self.opts, self.log)
3080+ self.oeb.clean_temp_files()
3081 self.ui_reporter(1.)
3082 run_plugins_on_postprocess(self.output, self.output_fmt)
3083
3084
3085=== modified file 'src/calibre/ebooks/htmlz/input.py'
3086--- src/calibre/ebooks/htmlz/input.py 2011-07-09 03:21:21 +0000
3087+++ src/calibre/ebooks/htmlz/input.py 2011-07-19 06:18:03 +0000
3088@@ -8,7 +8,7 @@
3089
3090 import os
3091
3092-from calibre import guess_type, walk
3093+from calibre import guess_type
3094 from calibre.customize.conversion import InputFormatPlugin
3095 from calibre.ebooks.chardet import xml_to_unicode
3096 from calibre.ebooks.metadata.opf2 import OPF
3097@@ -25,16 +25,50 @@
3098 accelerators):
3099 self.log = log
3100 html = u''
3101+ top_levels = []
3102
3103 # Extract content from zip archive.
3104 zf = ZipFile(stream)
3105 zf.extractall()
3106
3107- for x in walk('.'):
3108+ # Find the HTML file in the archive. It needs to be
3109+ # top level.
3110+ index = u''
3111+ multiple_html = False
3112+ # Get a list of all top level files in the archive.
3113+ for x in os.listdir('.'):
3114+ if os.path.isfile(x):
3115+ top_levels.append(x)
3116+ # Try to find an index. file.
3117+ for x in top_levels:
3118+ if x.lower() in ('index.html', 'index.xhtml', 'index.htm'):
3119+ index = x
3120+ break
3121+ # Look for multiple HTML files in the archive. We look at the
3122+ # top level files only as only they matter in HTMLZ.
3123+ for x in top_levels:
3124 if os.path.splitext(x)[1].lower() in ('.html', '.xhtml', '.htm'):
3125- with open(x, 'rb') as tf:
3126- html = tf.read()
3127- break
3128+ # Set index to the first HTML file found if it's not
3129+ # called index.
3130+ if not index:
3131+ index = x
3132+ else:
3133+ multiple_html = True
3134+ # Warn the user if there multiple HTML file in the archive. HTMLZ
3135+ # supports a single HTML file. A conversion with a multiple HTML file
3136+ # HTMLZ archive probably won't turn out as the user expects. With
3137+ # Multiple HTML files ZIP input should be used in place of HTMLZ.
3138+ if multiple_html:
3139+ log.warn(_('Multiple HTML files found in the archive. Only %s will be used.') % index)
3140+
3141+ if index:
3142+ with open(index, 'rb') as tf:
3143+ html = tf.read()
3144+ else:
3145+ raise Exception(_('No top level HTML file found.'))
3146+
3147+ if not html:
3148+ raise Exception(_('Top level HTML file %s is empty') % index)
3149
3150 # Encoding
3151 if options.input_encoding:
3152@@ -75,7 +109,7 @@
3153 # Get the cover path from the OPF.
3154 cover_path = None
3155 opf = None
3156- for x in walk('.'):
3157+ for x in top_levels:
3158 if os.path.splitext(x)[1].lower() in ('.opf'):
3159 opf = x
3160 break
3161
3162=== modified file 'src/calibre/ebooks/metadata/book/base.py'
3163--- src/calibre/ebooks/metadata/book/base.py 2011-07-10 19:29:15 +0000
3164+++ src/calibre/ebooks/metadata/book/base.py 2011-07-19 06:18:03 +0000
3165@@ -742,7 +742,7 @@
3166 ans += [('ISBN', unicode(self.isbn))]
3167 ans += [(_('Tags'), u', '.join([unicode(t) for t in self.tags]))]
3168 if self.series:
3169- ans += [_('Series'), unicode(self.series) + ' #%s'%self.format_series_index()]
3170+ ans += [(_('Series'), unicode(self.series) + ' #%s'%self.format_series_index())]
3171 ans += [(_('Language'), unicode(self.language))]
3172 if self.timestamp is not None:
3173 ans += [(_('Timestamp'), unicode(self.timestamp.isoformat(' ')))]
3174
3175=== modified file 'src/calibre/ebooks/mobi/debug.py'
3176--- src/calibre/ebooks/mobi/debug.py 2011-04-16 02:11:05 +0000
3177+++ src/calibre/ebooks/mobi/debug.py 2011-07-19 06:18:03 +0000
3178@@ -7,10 +7,11 @@
3179 __copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
3180 __docformat__ = 'restructuredtext en'
3181
3182-import struct, datetime
3183+import struct, datetime, sys, os
3184 from calibre.utils.date import utc_tz
3185 from calibre.ebooks.mobi.langcodes import main_language, sub_language
3186
3187+# PalmDB {{{
3188 class PalmDOCAttributes(object):
3189
3190 class Attr(object):
3191@@ -94,8 +95,9 @@
3192 ans.append('Number of records: %s'%self.number_of_records)
3193
3194 return '\n'.join(ans)
3195+# }}}
3196
3197-class Record(object):
3198+class Record(object): # {{{
3199
3200 def __init__(self, raw, header):
3201 self.offset, self.flags, self.uid = header
3202@@ -103,9 +105,11 @@
3203
3204 @property
3205 def header(self):
3206- return 'Offset: %d Flags: %d UID: %d'%(self.offset, self.flags,
3207- self.uid)
3208+ return 'Offset: %d Flags: %d UID: %d First 4 bytes: %r Size: %d'%(self.offset, self.flags,
3209+ self.uid, self.raw[:4], len(self.raw))
3210+# }}}
3211
3212+# EXTH {{{
3213 class EXTHRecord(object):
3214
3215 def __init__(self, type_, data):
3216@@ -189,9 +193,9 @@
3217 for r in self.records:
3218 ans.append(str(r))
3219 return '\n'.join(ans)
3220-
3221-
3222-class MOBIHeader(object):
3223+# }}}
3224+
3225+class MOBIHeader(object): # {{{
3226
3227 def __init__(self, record0):
3228 self.raw = record0.raw
3229@@ -311,7 +315,8 @@
3230 ans.append('Secondary index record: %d (null val: %d)'%(
3231 self.secondary_index_record, 0xffffffff))
3232 ans.append('Reserved2: %r'%self.reserved2)
3233- ans.append('First non-book record: %d'% self.first_non_book_record)
3234+ ans.append('First non-book record (null value: %d): %d'%(0xffffffff,
3235+ self.first_non_book_record))
3236 ans.append('Full name offset: %d'%self.fullname_offset)
3237 ans.append('Full name length: %d bytes'%self.fullname_length)
3238 ans.append('Langcode: %r'%self.locale_raw)
3239@@ -342,7 +347,8 @@
3240 ans.append('FLIS count: %d'% self.flis_count)
3241 ans.append('Unknown6: %r'% self.unknown6)
3242 ans.append('Extra data flags: %r'%self.extra_data_flags)
3243- ans.append('Primary index record: %d'%self.primary_index_record)
3244+ ans.append('Primary index record (null value: %d): %d'%(0xffffffff,
3245+ self.primary_index_record))
3246
3247 ans = '\n'.join(ans)
3248
3249@@ -355,8 +361,134 @@
3250
3251 ans += '\nRecord 0 length: %d'%len(self.raw)
3252 return ans
3253-
3254-class MOBIFile(object):
3255+# }}}
3256+
3257+class TagX(object): # {{{
3258+
3259+ def __init__(self, raw, control_byte_count):
3260+ self.tag = ord(raw[0])
3261+ self.num_values = ord(raw[1])
3262+ self.bmask = ord(raw[2])
3263+ self.bitmask = bin(self.bmask)
3264+ # End of file = 1 iff last entry
3265+ # When it is 1 all others are 0
3266+ self.eof = ord(raw[3])
3267+
3268+ self.is_eof = (self.eof == 1 and self.tag == 0 and self.num_values == 0
3269+ and self.bmask == 0)
3270+
3271+ def __repr__(self):
3272+ return 'TAGX(tag=%02d, num_values=%d, bitmask=%r (%d), eof=%d)' % (self.tag,
3273+ self.num_values, self.bitmask, self.bmask, self.eof)
3274+ # }}}
3275+
3276+class PrimaryIndexRecord(object): # {{{
3277+
3278+ def __init__(self, record):
3279+ self.record = record
3280+ raw = self.record.raw
3281+ if raw[:4] != b'INDX':
3282+ raise ValueError('Invalid Primary Index Record')
3283+
3284+ self.header_length, = struct.unpack('>I', raw[4:8])
3285+ self.unknown1 = raw[8:16]
3286+ self.index_type, = struct.unpack('>I', raw[16:20])
3287+ self.index_type_desc = {0: 'normal', 2:
3288+ 'inflection'}.get(self.index_type, 'unknown')
3289+ self.idxt_start, = struct.unpack('>I', raw[20:24])
3290+ self.index_count, = struct.unpack('>I', raw[24:28])
3291+ self.index_encoding_num, = struct.unpack('>I', raw[28:32])
3292+ self.index_encoding = {65001: 'utf-8', 1252:
3293+ 'cp1252'}.get(self.index_encoding_num, 'unknown')
3294+ if self.index_encoding == 'unknown':
3295+ raise ValueError(
3296+ 'Unknown index encoding: %d'%self.index_encoding_num)
3297+ self.locale_raw, = struct.unpack(b'>I', raw[32:36])
3298+ langcode = self.locale_raw
3299+ langid = langcode & 0xFF
3300+ sublangid = (langcode >> 10) & 0xFF
3301+ self.language = main_language.get(langid, 'ENGLISH')
3302+ self.sublanguage = sub_language.get(sublangid, 'NEUTRAL')
3303+ self.num_index_entries, = struct.unpack('>I', raw[36:40])
3304+ self.ordt_start, = struct.unpack('>I', raw[40:44])
3305+ self.ligt_start, = struct.unpack('>I', raw[44:48])
3306+ self.num_of_ligt_entries, = struct.unpack('>I', raw[48:52])
3307+ self.num_of_ctoc_blocks, = struct.unpack('>I', raw[52:56])
3308+ self.unknown2 = raw[56:180]
3309+ self.tagx_offset, = struct.unpack(b'>I', raw[180:184])
3310+ if self.tagx_offset != self.header_length:
3311+ raise ValueError('TAGX offset and header length disagree')
3312+ self.unknown3 = raw[184:self.header_length]
3313+
3314+ tagx = raw[self.header_length:]
3315+ if not tagx.startswith(b'TAGX'):
3316+ raise ValueError('Invalid TAGX section')
3317+ self.tagx_header_length, = struct.unpack('>I', tagx[4:8])
3318+ self.tagx_control_byte_count, = struct.unpack('>I', tagx[8:12])
3319+ tag_table = tagx[12:self.tagx_header_length]
3320+ if len(tag_table) % 4 != 0:
3321+ raise ValueError('Invalid Tag table')
3322+ num_tagx_entries = len(tag_table) // 4
3323+ self.tagx_entries = []
3324+ for i in range(num_tagx_entries):
3325+ self.tagx_entries.append(TagX(tag_table[i*4:(i+1)*4],
3326+ self.tagx_control_byte_count))
3327+ if self.tagx_entries and not self.tagx_entries[-1].is_eof:
3328+ raise ValueError('TAGX last entry is not EOF')
3329+
3330+ idxt0_pos = self.header_length+self.tagx_header_length
3331+ last_name_len, = struct.unpack(b'>B', raw[idxt0_pos])
3332+ count_pos = idxt0_pos+1+last_name_len
3333+ last_num = int(raw[idxt0_pos+1:count_pos], 16)
3334+ self.ncx_count, = struct.unpack(b'>H', raw[count_pos:count_pos+2])
3335+
3336+ if last_num != self.ncx_count - 1:
3337+ raise ValueError('Last id number in the NCX != NCX count - 1')
3338+ # There may be some alignment zero bytes between the end of the idxt0
3339+ # and self.idxt_start
3340+
3341+ idxt = raw[self.idxt_start:]
3342+ if idxt[:4] != b'IDXT':
3343+ raise ValueError('Invalid IDXT header')
3344+ length_check, = struct.unpack(b'>H', idxt[4:6])
3345+ if length_check != self.header_length + self.tagx_header_length:
3346+ raise ValueError('Length check failed')
3347+
3348+ def __str__(self):
3349+ ans = ['*'*20 + ' Index Header '+ '*'*20]
3350+ a = ans.append
3351+ a('Header length: %d'%self.header_length)
3352+ a('Unknown1: %r (%d bytes) (All zeros: %r)'%(self.unknown1,
3353+ len(self.unknown1), not bool(self.unknown1.replace(b'\0', '')) ))
3354+ a('Index Type: %s (%d)'%(self.index_type_desc, self.index_type))
3355+ a('Offset to IDXT start: %d'%self.idxt_start)
3356+ a('Number of index records: %d'%self.index_count)
3357+ a('Index encoding: %s (%d)'%(self.index_encoding,
3358+ self.index_encoding_num))
3359+ a('Index language: %s - %s (%s)'%(self.language, self.sublanguage,
3360+ hex(self.locale_raw)))
3361+ a('Number of index entries: %d'% self.num_index_entries)
3362+ a('ORDT start: %d'%self.ordt_start)
3363+ a('LIGT start: %d'%self.ligt_start)
3364+ a('Number of LIGT entries: %d'%self.num_of_ligt_entries)
3365+ a('Number of CTOC blocks: %d'%self.num_of_ctoc_blocks)
3366+ a('Unknown2: %r (%d bytes) (All zeros: %r)'%(self.unknown2,
3367+ len(self.unknown2), not bool(self.unknown2.replace(b'\0', '')) ))
3368+ a('TAGX offset: %d'%self.tagx_offset)
3369+ a('Unknown3: %r (%d bytes) (All zeros: %r)'%(self.unknown3,
3370+ len(self.unknown3), not bool(self.unknown3.replace(b'\0', '')) ))
3371+ a('\n\n')
3372+ a('*'*20 + ' TAGX Header (%d bytes)'%self.tagx_header_length+ '*'*20)
3373+ a('Header length: %d'%self.tagx_header_length)
3374+ a('Control byte count: %d'%self.tagx_control_byte_count)
3375+ for i in self.tagx_entries:
3376+ a('\t' + repr(i))
3377+ a('Number of entries in the NCX: %d'% self.ncx_count)
3378+
3379+ return '\n'.join(ans)
3380+ # }}}
3381+
3382+class MOBIFile(object): # {{{
3383
3384 def __init__(self, stream):
3385 self.raw = stream.read()
3386@@ -384,25 +516,40 @@
3387
3388 self.mobi_header = MOBIHeader(self.records[0])
3389
3390-
3391- def print_header(self):
3392- print (str(self.palmdb).encode('utf-8'))
3393- print ()
3394- print ('Record headers:')
3395+ self.primary_index_record = None
3396+ pir = self.mobi_header.primary_index_record
3397+ if pir != 0xffffffff:
3398+ self.primary_index_record = PrimaryIndexRecord(self.records[pir])
3399+
3400+
3401+ def print_header(self, f=sys.stdout):
3402+ print (str(self.palmdb).encode('utf-8'), file=f)
3403+ print (file=f)
3404+ print ('Record headers:', file=f)
3405 for i, r in enumerate(self.records):
3406- print ('%6d. %s'%(i, r.header))
3407+ print ('%6d. %s'%(i, r.header), file=f)
3408
3409- print ()
3410- print (str(self.mobi_header).encode('utf-8'))
3411+ print (file=f)
3412+ print (str(self.mobi_header).encode('utf-8'), file=f)
3413+# }}}
3414
3415 def inspect_mobi(path_or_stream):
3416 stream = (path_or_stream if hasattr(path_or_stream, 'read') else
3417 open(path_or_stream, 'rb'))
3418 f = MOBIFile(stream)
3419- f.print_header()
3420+ ddir = 'debug_' + os.path.splitext(os.path.basename(stream.name))[0]
3421+ if not os.path.exists(ddir):
3422+ os.mkdir(ddir)
3423+ with open(os.path.join(ddir, 'header.txt'), 'wb') as out:
3424+ f.print_header(f=out)
3425+ if f.primary_index_record is not None:
3426+ with open(os.path.join(ddir, 'primary_index_record.txt'), 'wb') as out:
3427+ print(str(f.primary_index_record), file=out)
3428+ print ('Debug data saved to:', ddir)
3429+
3430+def main():
3431+ inspect_mobi(sys.argv[1])
3432
3433 if __name__ == '__main__':
3434- import sys
3435- f = MOBIFile(open(sys.argv[1], 'rb'))
3436- f.print_header()
3437+ main()
3438
3439
3440=== modified file 'src/calibre/ebooks/mobi/output.py'
3441--- src/calibre/ebooks/mobi/output.py 2010-12-12 19:09:43 +0000
3442+++ src/calibre/ebooks/mobi/output.py 2011-07-19 06:18:03 +0000
3443@@ -27,7 +27,7 @@
3444 ),
3445 OptionRecommendation(name='no_inline_toc',
3446 recommended_value=False, level=OptionRecommendation.LOW,
3447- help=_('Don\'t add Table of Contents to end of book. Useful if '
3448+ help=_('Don\'t add Table of Contents to the book. Useful if '
3449 'the book has its own table of contents.')),
3450 OptionRecommendation(name='toc_title', recommended_value=None,
3451 help=_('Title for any generated in-line table of contents.')
3452@@ -45,6 +45,12 @@
3453 'the MOBI output plugin will try to convert margins specified'
3454 ' in the input document, otherwise it will ignore them.')
3455 ),
3456+ OptionRecommendation(name='mobi_toc_at_start',
3457+ recommended_value=False,
3458+ help=_('When adding the Table of Contents to the book, add it at the start of the '
3459+ 'book instead of the end. Not recommended.')
3460+ ),
3461+
3462 ])
3463
3464 def check_for_periodical(self):
3465@@ -150,7 +156,7 @@
3466 # Fix up the periodical href to point to first section href
3467 toc.nodes[0].href = toc.nodes[0].nodes[0].href
3468
3469- # GR diagnostics
3470+ # diagnostics
3471 if self.opts.verbose > 3:
3472 self.dump_toc(toc)
3473 self.dump_manifest()
3474@@ -158,16 +164,14 @@
3475
3476 def convert(self, oeb, output_path, input_plugin, opts, log):
3477 self.log, self.opts, self.oeb = log, opts, oeb
3478- from calibre.ebooks.mobi.writer import PALM_MAX_IMAGE_SIZE, \
3479- MobiWriter, PALMDOC, UNCOMPRESSED
3480 from calibre.ebooks.mobi.mobiml import MobiMLizer
3481 from calibre.ebooks.oeb.transforms.manglecase import CaseMangler
3482 from calibre.ebooks.oeb.transforms.rasterize import SVGRasterizer, Unavailable
3483 from calibre.ebooks.oeb.transforms.htmltoc import HTMLTOCAdder
3484 from calibre.customize.ui import plugin_for_input_format
3485- imagemax = PALM_MAX_IMAGE_SIZE if opts.rescale_images else None
3486 if not opts.no_inline_toc:
3487- tocadder = HTMLTOCAdder(title=opts.toc_title)
3488+ tocadder = HTMLTOCAdder(title=opts.toc_title, position='start' if
3489+ opts.mobi_toc_at_start else 'end')
3490 tocadder(oeb, opts)
3491 mangler = CaseMangler()
3492 mangler(oeb, opts)
3493@@ -179,10 +183,14 @@
3494 mobimlizer = MobiMLizer(ignore_tables=opts.linearize_tables)
3495 mobimlizer(oeb, opts)
3496 self.check_for_periodical()
3497- write_page_breaks_after_item = not input_plugin is plugin_for_input_format('cbz')
3498- writer = MobiWriter(opts, imagemax=imagemax,
3499- compression=UNCOMPRESSED if opts.dont_compress else PALMDOC,
3500- prefer_author_sort=opts.prefer_author_sort,
3501- write_page_breaks_after_item=write_page_breaks_after_item)
3502+ write_page_breaks_after_item = input_plugin is not plugin_for_input_format('cbz')
3503+ from calibre.utils.config import tweaks
3504+ if tweaks.get('new_mobi_writer', False):
3505+ from calibre.ebooks.mobi.writer2.main import MobiWriter
3506+ MobiWriter
3507+ else:
3508+ from calibre.ebooks.mobi.writer import MobiWriter
3509+ writer = MobiWriter(opts,
3510+ write_page_breaks_after_item=write_page_breaks_after_item)
3511 writer(oeb, output_path)
3512
3513
3514=== modified file 'src/calibre/ebooks/mobi/writer.py'
3515--- src/calibre/ebooks/mobi/writer.py 2011-06-15 16:12:34 +0000
3516+++ src/calibre/ebooks/mobi/writer.py 2011-07-19 06:18:03 +0000
3517@@ -111,7 +111,8 @@
3518
3519 def rescale_image(data, maxsizeb, dimen=None):
3520 if dimen is not None:
3521- data = thumbnail(data, width=dimen, height=dimen)[-1]
3522+ data = thumbnail(data, width=dimen[0], height=dimen[1],
3523+ compression_quality=90)[-1]
3524 else:
3525 # Replace transparent pixels with white pixels and convert to JPEG
3526 data = save_cover_data_to(data, 'img.jpg', return_data=True)
3527@@ -141,7 +142,7 @@
3528 scale -= 0.05
3529 return data
3530
3531-class Serializer(object):
3532+class Serializer(object): # {{{
3533 NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
3534
3535 def __init__(self, oeb, images, write_page_breaks_after_item=True):
3536@@ -172,6 +173,9 @@
3537 hrefs = self.oeb.manifest.hrefs
3538 buffer.write('<guide>')
3539 for ref in self.oeb.guide.values():
3540+ # The Kindle decides where to open a book based on the presence of
3541+ # an item in the guide that looks like
3542+ # <reference type="text" title="Start" href="chapter-one.xhtml"/>
3543 path = urldefrag(ref.href)[0]
3544 if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
3545 continue
3546@@ -215,12 +219,6 @@
3547 self.anchor_offset = buffer.tell()
3548 buffer.write('<body>')
3549 self.anchor_offset_kindle = buffer.tell()
3550- # CybookG3 'Start Reading' link
3551- if 'text' in self.oeb.guide:
3552- href = self.oeb.guide['text'].href
3553- buffer.write('<a ')
3554- self.serialize_href(href)
3555- buffer.write(' />')
3556 spine = [item for item in self.oeb.spine if item.linear]
3557 spine.extend([item for item in self.oeb.spine if not item.linear])
3558 for item in spine:
3559@@ -315,16 +313,20 @@
3560 buffer.seek(hoff)
3561 buffer.write('%010d' % ioff)
3562
3563+ # }}}
3564+
3565 class MobiWriter(object):
3566 COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
3567
3568- def __init__(self, opts, compression=PALMDOC, imagemax=None,
3569- prefer_author_sort=False, write_page_breaks_after_item=True):
3570+ def __init__(self, opts,
3571+ write_page_breaks_after_item=True):
3572 self.opts = opts
3573 self.write_page_breaks_after_item = write_page_breaks_after_item
3574- self._compression = compression or UNCOMPRESSED
3575- self._imagemax = imagemax or OTHER_MAX_IMAGE_SIZE
3576- self._prefer_author_sort = prefer_author_sort
3577+ self._compression = UNCOMPRESSED if getattr(opts, 'dont_compress',
3578+ False) else PALMDOC
3579+ self._imagemax = (PALM_MAX_IMAGE_SIZE if getattr(opts,
3580+ 'rescale_images', False) else OTHER_MAX_IMAGE_SIZE)
3581+ self._prefer_author_sort = getattr(opts, 'prefer_author_sort', False)
3582 self._primary_index_record = None
3583 self._conforming_periodical_toc = False
3584 self._indexable = False
3585@@ -1325,6 +1327,8 @@
3586 except:
3587 self._oeb.logger.warn('Bad image file %r' % item.href)
3588 continue
3589+ finally:
3590+ item.unload_data_from_memory()
3591 self._records.append(data)
3592 if self._first_image_record is None:
3593 self._first_image_record = len(self._records)-1
3594
3595=== added directory 'src/calibre/ebooks/mobi/writer2'
3596=== added file 'src/calibre/ebooks/mobi/writer2/__init__.py'
3597--- src/calibre/ebooks/mobi/writer2/__init__.py 1970-01-01 00:00:00 +0000
3598+++ src/calibre/ebooks/mobi/writer2/__init__.py 2011-07-19 06:18:03 +0000
3599@@ -0,0 +1,15 @@
3600+#!/usr/bin/env python
3601+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
3602+from __future__ import (unicode_literals, division, absolute_import,
3603+ print_function)
3604+
3605+__license__ = 'GPL v3'
3606+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
3607+__docformat__ = 'restructuredtext en'
3608+
3609+
3610+UNCOMPRESSED = 1
3611+PALMDOC = 2
3612+HUFFDIC = 17480
3613+PALM_MAX_IMAGE_SIZE = 63 * 1024
3614+
3615
3616=== added file 'src/calibre/ebooks/mobi/writer2/main.py'
3617--- src/calibre/ebooks/mobi/writer2/main.py 1970-01-01 00:00:00 +0000
3618+++ src/calibre/ebooks/mobi/writer2/main.py 2011-07-19 06:18:03 +0000
3619@@ -0,0 +1,579 @@
3620+#!/usr/bin/env python
3621+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
3622+from __future__ import (unicode_literals, division, absolute_import,
3623+ print_function)
3624+
3625+__license__ = 'GPL v3'
3626+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
3627+__docformat__ = 'restructuredtext en'
3628+
3629+import re, random, time
3630+from cStringIO import StringIO
3631+from struct import pack
3632+
3633+from calibre.ebooks import normalize
3634+from calibre.ebooks.oeb.base import OEB_RASTER_IMAGES
3635+from calibre.ebooks.mobi.writer2.serializer import Serializer
3636+from calibre.ebooks.compression.palmdoc import compress_doc
3637+from calibre.utils.magick.draw import Image, save_cover_data_to, thumbnail
3638+from calibre.ebooks.mobi.langcodes import iana2mobi
3639+from calibre.utils.filenames import ascii_filename
3640+from calibre.ebooks.mobi.writer2 import PALMDOC, UNCOMPRESSED
3641+
3642+EXTH_CODES = {
3643+ 'creator': 100,
3644+ 'publisher': 101,
3645+ 'description': 103,
3646+ 'identifier': 104,
3647+ 'subject': 105,
3648+ 'pubdate': 106,
3649+ 'date': 106,
3650+ 'review': 107,
3651+ 'contributor': 108,
3652+ 'rights': 109,
3653+ 'type': 111,
3654+ 'source': 112,
3655+ 'title': 503,
3656+ }
3657+
3658+# Disabled as I dont care about uncrossable breaks
3659+WRITE_UNCROSSABLE_BREAKS = False
3660+
3661+RECORD_SIZE = 0x1000 # 4096
3662+
3663+IMAGE_MAX_SIZE = 10 * 1024 * 1024
3664+MAX_THUMB_SIZE = 16 * 1024
3665+MAX_THUMB_DIMEN = (180, 240)
3666+
3667+# Almost like the one for MS LIT, but not quite.
3668+DECINT_FORWARD = 0
3669+DECINT_BACKWARD = 1
3670+
3671+def decint(value, direction):
3672+ '''
3673+ Some parts of the Mobipocket format encode data as variable-width integers.
3674+ These integers are represented big-endian with 7 bits per byte in bits 1-7.
3675+ They may be either forward-encoded, in which case only the LSB has bit 8 set,
3676+ or backward-encoded, in which case only the MSB has bit 8 set.
3677+ For example, the number 0x11111 would be represented forward-encoded as:
3678+
3679+ 0x04 0x22 0x91
3680+
3681+ And backward-encoded as:
3682+
3683+ 0x84 0x22 0x11
3684+
3685+ This function encodes the integer ``value`` as a variable width integer and
3686+ returns the bytestring corresponding to it.
3687+ '''
3688+ # Encode vwi
3689+ byts = bytearray()
3690+ while True:
3691+ b = value & 0x7f
3692+ value >>= 7
3693+ byts.append(b)
3694+ if value == 0:
3695+ break
3696+ if direction == DECINT_FORWARD:
3697+ byts[0] |= 0x80
3698+ elif direction == DECINT_BACKWARD:
3699+ byts[-1] |= 0x80
3700+ return bytes(byts)
3701+
3702+def rescale_image(data, maxsizeb=IMAGE_MAX_SIZE, dimen=None):
3703+ '''
3704+ Convert image setting all transparent pixels to white and changing format
3705+ to JPEG. Ensure the resultant image has a byte size less than
3706+ maxsizeb.
3707+
3708+ If dimen is not None, generate a thumbnail of width=dimen, height=dimen
3709+
3710+ Returns the image as a bytestring
3711+ '''
3712+ if dimen is not None:
3713+ data = thumbnail(data, width=dimen, height=dimen,
3714+ compression_quality=90)[-1]
3715+ else:
3716+ # Replace transparent pixels with white pixels and convert to JPEG
3717+ data = save_cover_data_to(data, 'img.jpg', return_data=True)
3718+ if len(data) <= maxsizeb:
3719+ return data
3720+ orig_data = data
3721+ img = Image()
3722+ quality = 95
3723+
3724+ img.load(data)
3725+ while len(data) >= maxsizeb and quality >= 10:
3726+ quality -= 5
3727+ img.set_compression_quality(quality)
3728+ data = img.export('jpg')
3729+ if len(data) <= maxsizeb:
3730+ return data
3731+ orig_data = data
3732+
3733+ scale = 0.9
3734+ while len(data) >= maxsizeb and scale >= 0.05:
3735+ img = Image()
3736+ img.load(orig_data)
3737+ w, h = img.size
3738+ img.size = (int(scale*w), int(scale*h))
3739+ img.set_compression_quality(quality)
3740+ data = img.export('jpg')
3741+ scale -= 0.05
3742+ return data
3743+
3744+class MobiWriter(object):
3745+ COLLAPSE_RE = re.compile(r'[ \t\r\n\v]+')
3746+
3747+ def __init__(self, opts, write_page_breaks_after_item=True):
3748+ self.opts = opts
3749+ self.write_page_breaks_after_item = write_page_breaks_after_item
3750+ self.compression = UNCOMPRESSED if opts.dont_compress else PALMDOC
3751+ self.prefer_author_sort = opts.prefer_author_sort
3752+
3753+ def __call__(self, oeb, path_or_stream):
3754+ if hasattr(path_or_stream, 'write'):
3755+ return self.dump_stream(oeb, path_or_stream)
3756+ with open(path_or_stream, 'w+b') as stream:
3757+ return self.dump_stream(oeb, stream)
3758+
3759+ def write(self, *args):
3760+ for datum in args:
3761+ self.stream.write(datum)
3762+
3763+ def tell(self):
3764+ return self.stream.tell()
3765+
3766+ def dump_stream(self, oeb, stream):
3767+ self.oeb = oeb
3768+ self.stream = stream
3769+ self.records = [None]
3770+ self.generate_content()
3771+ self.generate_record0()
3772+ self.write_header()
3773+ self.write_content()
3774+
3775+ def generate_content(self):
3776+ self.map_image_names()
3777+ self.generate_text()
3778+ # Image records come after text records
3779+ self.generate_images()
3780+
3781+ def map_image_names(self):
3782+ '''
3783+ Map image names to record indices, ensuring that the masthead image if
3784+ present has index number 1.
3785+ '''
3786+ index = 1
3787+ self.images = images = {}
3788+ mh_href = None
3789+
3790+ if 'masthead' in self.oeb.guide:
3791+ mh_href = self.oeb.guide['masthead'].href
3792+ images[mh_href] = 1
3793+ index += 1
3794+
3795+ for item in self.oeb.manifest.values():
3796+ if item.media_type in OEB_RASTER_IMAGES:
3797+ if item.href == mh_href: continue
3798+ images[item.href] = index
3799+ index += 1
3800+
3801+ def generate_images(self):
3802+ self.oeb.logger.info('Serializing images...')
3803+ images = [(index, href) for href, index in self.images.iteritems()]
3804+ images.sort()
3805+ self.first_image_record = None
3806+ for _, href in images:
3807+ item = self.oeb.manifest.hrefs[href]
3808+ try:
3809+ data = rescale_image(item.data)
3810+ except:
3811+ self.oeb.logger.warn('Bad image file %r' % item.href)
3812+ continue
3813+ finally:
3814+ item.unload_data_from_memory()
3815+ self.records.append(data)
3816+ if self.first_image_record is None:
3817+ self.first_image_record = len(self.records) - 1
3818+
3819+ def generate_text(self):
3820+ self.oeb.logger.info('Serializing markup content...')
3821+ serializer = Serializer(self.oeb, self.images,
3822+ write_page_breaks_after_item=self.write_page_breaks_after_item)
3823+ text = serializer()
3824+ breaks = serializer.breaks
3825+ self.anchor_offset_kindle = serializer.anchor_offset_kindle
3826+ self.id_offsets = serializer.id_offsets
3827+ self.content_length = len(text)
3828+ self.text_length = len(text)
3829+ text = StringIO(text)
3830+ buf = []
3831+ nrecords = 0
3832+ offset = 0
3833+
3834+ if self.compression != UNCOMPRESSED:
3835+ self.oeb.logger.info(' Compressing markup content...')
3836+ data, overlap = self.read_text_record(text)
3837+
3838+ while len(data) > 0:
3839+ if self.compression == PALMDOC:
3840+ data = compress_doc(data)
3841+ record = StringIO()
3842+ record.write(data)
3843+
3844+ self.records.append(record.getvalue())
3845+ buf.append(self.records[-1])
3846+ nrecords += 1
3847+ offset += RECORD_SIZE
3848+ data, overlap = self.read_text_record(text)
3849+
3850+ # Write information about the mutibyte character overlap, if any
3851+ record.write(overlap)
3852+ record.write(pack(b'>B', len(overlap)))
3853+
3854+ # Write information about uncrossable breaks (non linear items in
3855+ # the spine)
3856+ if WRITE_UNCROSSABLE_BREAKS:
3857+ nextra = 0
3858+ pbreak = 0
3859+ running = offset
3860+
3861+ # Write information about every uncrossable break that occurs in
3862+ # the next record.
3863+ while breaks and (breaks[0] - offset) < RECORD_SIZE:
3864+ pbreak = (breaks.pop(0) - running) >> 3
3865+ encoded = decint(pbreak, DECINT_FORWARD)
3866+ record.write(encoded)
3867+ running += pbreak << 3
3868+ nextra += len(encoded)
3869+ lsize = 1
3870+ while True:
3871+ size = decint(nextra + lsize, DECINT_BACKWARD)
3872+ if len(size) == lsize:
3873+ break
3874+ lsize += 1
3875+ record.write(size)
3876+
3877+ self.text_nrecords = nrecords + 1
3878+
3879+ def read_text_record(self, text):
3880+ '''
3881+ Return a Palmdoc record of size RECORD_SIZE from the text file object.
3882+ In case the record ends in the middle of a multibyte character return
3883+ the overlap as well.
3884+
3885+ Returns data, overlap: where both are byte strings. overlap is the
3886+ extra bytes needed to complete the truncated multibyte character.
3887+ '''
3888+ opos = text.tell()
3889+ text.seek(0, 2)
3890+ # npos is the position of the next record
3891+ npos = min((opos + RECORD_SIZE, text.tell()))
3892+ # Number of bytes from the next record needed to complete the last
3893+ # character in this record
3894+ extra = 0
3895+
3896+ last = b''
3897+ while not last.decode('utf-8', 'ignore'):
3898+ # last contains no valid utf-8 characters
3899+ size = len(last) + 1
3900+ text.seek(npos - size)
3901+ last = text.read(size)
3902+
3903+ # last now has one valid utf-8 char and possibly some bytes that belong
3904+ # to a truncated char
3905+
3906+ try:
3907+ last.decode('utf-8', 'strict')
3908+ except UnicodeDecodeError:
3909+ # There are some truncated bytes in last
3910+ prev = len(last)
3911+ while True:
3912+ text.seek(npos - prev)
3913+ last = text.read(len(last) + 1)
3914+ try:
3915+ last.decode('utf-8')
3916+ except UnicodeDecodeError:
3917+ pass
3918+ else:
3919+ break
3920+ extra = len(last) - prev
3921+
3922+ text.seek(opos)
3923+ data = text.read(RECORD_SIZE)
3924+ overlap = text.read(extra)
3925+ text.seek(npos)
3926+
3927+ return data, overlap
3928+
3929+ def generate_end_records(self):
3930+ self.flis_number = len(self.records)
3931+ self.records.append('\xE9\x8E\x0D\x0A')
3932+
3933+ def generate_record0(self): # {{{
3934+ metadata = self.oeb.metadata
3935+ exth = self.build_exth()
3936+ last_content_record = len(self.records) - 1
3937+
3938+ self.generate_end_records()
3939+
3940+ record0 = StringIO()
3941+ # The PalmDOC Header
3942+ record0.write(pack(b'>HHIHHHH', self.compression, 0,
3943+ self.text_length,
3944+ self.text_nrecords-1, RECORD_SIZE, 0, 0)) # 0 - 15 (0x0 - 0xf)
3945+ uid = random.randint(0, 0xffffffff)
3946+ title = normalize(unicode(metadata.title[0])).encode('utf-8')
3947+ # The MOBI Header
3948+
3949+ # 0x0 - 0x3
3950+ record0.write(b'MOBI')
3951+
3952+ # 0x4 - 0x7 : Length of header
3953+ # 0x8 - 0x11 : MOBI type
3954+ # type meaning
3955+ # 0x002 MOBI book (chapter - chapter navigation)
3956+ # 0x101 News - Hierarchical navigation with sections and articles
3957+ # 0x102 News feed - Flat navigation
3958+ # 0x103 News magazine - same as 0x101
3959+ # 0xC - 0xF : Text encoding (65001 is utf-8)
3960+ # 0x10 - 0x13 : UID
3961+ # 0x14 - 0x17 : Generator version
3962+
3963+ record0.write(pack(b'>IIIII',
3964+ 0xe8, 0x002, 65001, uid, 6))
3965+
3966+ # 0x18 - 0x1f : Unknown
3967+ record0.write(b'\xff' * 8)
3968+
3969+
3970+ # 0x20 - 0x23 : Secondary index record
3971+ record0.write(pack(b'>I', 0xffffffff))
3972+
3973+ # 0x24 - 0x3f : Unknown
3974+ record0.write(b'\xff' * 28)
3975+
3976+ # 0x40 - 0x43 : Offset of first non-text record
3977+ record0.write(pack(b'>I',
3978+ self.text_nrecords + 1))
3979+
3980+ # 0x44 - 0x4b : title offset, title length
3981+ record0.write(pack(b'>II',
3982+ 0xe8 + 16 + len(exth), len(title)))
3983+
3984+ # 0x4c - 0x4f : Language specifier
3985+ record0.write(iana2mobi(
3986+ str(metadata.language[0])))
3987+
3988+ # 0x50 - 0x57 : Unknown
3989+ record0.write(b'\0' * 8)
3990+
3991+ # 0x58 - 0x5b : Format version
3992+ # 0x5c - 0x5f : First image record number
3993+ record0.write(pack(b'>II',
3994+ 6, self.first_image_record if self.first_image_record else 0))
3995+
3996+ # 0x60 - 0x63 : First HUFF/CDIC record number
3997+ # 0x64 - 0x67 : Number of HUFF/CDIC records
3998+ # 0x68 - 0x6b : First DATP record number
3999+ # 0x6c - 0x6f : Number of DATP records
4000+ record0.write(b'\0' * 16)
4001+
4002+ # 0x70 - 0x73 : EXTH flags
4003+ record0.write(pack(b'>I', 0x50))
4004+
4005+ # 0x74 - 0x93 : Unknown
4006+ record0.write(b'\0' * 32)
4007+
4008+ # 0x94 - 0x97 : DRM offset
4009+ # 0x98 - 0x9b : DRM count
4010+ # 0x9c - 0x9f : DRM size
4011+ # 0xa0 - 0xa3 : DRM flags
4012+ record0.write(pack(b'>IIII',
4013+ 0xffffffff, 0xffffffff, 0, 0))
4014+
4015+
4016+ # 0xa4 - 0xaf : Unknown
4017+ record0.write(b'\0'*12)
4018+
4019+ # 0xb0 - 0xb1 : First content record number
4020+ # 0xb2 - 0xb3 : last content record number
4021+ # (Includes Image, DATP, HUFF, DRM)
4022+ record0.write(pack(b'>HH', 1, last_content_record))
4023+
4024+ # 0xb4 - 0xb7 : Unknown
4025+ record0.write(b'\0\0\0\x01')
4026+
4027+ # 0xb8 - 0xbb : FCIS record number
4028+ record0.write(pack(b'>I', 0xffffffff))
4029+
4030+ # 0xbc - 0xbf : Unknown (FCIS record count?)
4031+ record0.write(pack(b'>I', 0xffffffff))
4032+
4033+ # 0xc0 - 0xc3 : FLIS record number
4034+ record0.write(pack(b'>I', 0xffffffff))
4035+
4036+ # 0xc4 - 0xc7 : Unknown (FLIS record count?)
4037+ record0.write(pack(b'>I', 1))
4038+
4039+ # 0xc8 - 0xcf : Unknown
4040+ record0.write(b'\0'*8)
4041+
4042+ # 0xd0 - 0xdf : Unknown
4043+ record0.write(pack(b'>IIII', 0xffffffff, 0, 0xffffffff, 0xffffffff))
4044+
4045+ # 0xe0 - 0xe3 : Extra record data
4046+ # Extra record data flags:
4047+ # - 0x1: <extra multibyte bytes><size> (?)
4048+ # - 0x2: <TBS indexing description of this HTML record><size> GR
4049+ # - 0x4: <uncrossable breaks><size>
4050+ # GR: Use 7 for indexed files, 5 for unindexed
4051+ # Setting bit 2 (0x2) disables <guide><reference type="start"> functionality
4052+
4053+ extra_data_flags = 0b1 # Has multibyte overlap bytes
4054+ if WRITE_UNCROSSABLE_BREAKS:
4055+ extra_data_flags |= 0b100
4056+ record0.write(pack(b'>I', extra_data_flags))
4057+
4058+ # 0xe4 - 0xe7 : Primary index record
4059+ record0.write(pack(b'>I', 0xffffffff))
4060+
4061+ record0.write(exth)
4062+ record0.write(title)
4063+ record0 = record0.getvalue()
4064+ # Add some buffer so that Amazon can add encryption information if this
4065+ # MOBI is submitted for publication
4066+ record0 += (b'\0' * (1024*8))
4067+ self.records[0] = record0
4068+ # }}}
4069+
4070+ def build_exth(self): # {{{
4071+ oeb = self.oeb
4072+ exth = StringIO()
4073+ nrecs = 0
4074+ for term in oeb.metadata:
4075+ if term not in EXTH_CODES: continue
4076+ code = EXTH_CODES[term]
4077+ items = oeb.metadata[term]
4078+ if term == 'creator':
4079+ if self.prefer_author_sort:
4080+ creators = [normalize(unicode(c.file_as or c)) for c in items]
4081+ else:
4082+ creators = [normalize(unicode(c)) for c in items]
4083+ items = ['; '.join(creators)]
4084+ for item in items:
4085+ data = self.COLLAPSE_RE.sub(' ', normalize(unicode(item)))
4086+ if term == 'identifier':
4087+ if data.lower().startswith('urn:isbn:'):
4088+ data = data[9:]
4089+ elif item.scheme.lower() == 'isbn':
4090+ pass
4091+ else:
4092+ continue
4093+ data = data.encode('utf-8')
4094+ exth.write(pack(b'>II', code, len(data) + 8))
4095+ exth.write(data)
4096+ nrecs += 1
4097+ if term == 'rights' :
4098+ try:
4099+ rights = normalize(unicode(oeb.metadata.rights[0])).encode('utf-8')
4100+ except:
4101+ rights = b'Unknown'
4102+ exth.write(pack(b'>II', EXTH_CODES['rights'], len(rights) + 8))
4103+ exth.write(rights)
4104+ nrecs += 1
4105+
4106+ # Write UUID as ASIN
4107+ uuid = None
4108+ from calibre.ebooks.oeb.base import OPF
4109+ for x in oeb.metadata['identifier']:
4110+ if (x.get(OPF('scheme'), None).lower() == 'uuid' or
4111+ unicode(x).startswith('urn:uuid:')):
4112+ uuid = unicode(x).split(':')[-1]
4113+ break
4114+ if uuid is None:
4115+ from uuid import uuid4
4116+ uuid = str(uuid4())
4117+
4118+ if isinstance(uuid, unicode):
4119+ uuid = uuid.encode('utf-8')
4120+ exth.write(pack(b'>II', 113, len(uuid) + 8))
4121+ exth.write(uuid)
4122+ nrecs += 1
4123+
4124+ # Write cdetype
4125+ if not self.opts.mobi_periodical:
4126+ data = b'EBOK'
4127+ exth.write(pack(b'>II', 501, len(data)+8))
4128+ exth.write(data)
4129+ nrecs += 1
4130+
4131+ # Add a publication date entry
4132+ if oeb.metadata['date'] != [] :
4133+ datestr = str(oeb.metadata['date'][0])
4134+ elif oeb.metadata['timestamp'] != [] :
4135+ datestr = str(oeb.metadata['timestamp'][0])
4136+
4137+ if datestr is not None:
4138+ exth.write(pack(b'>II', EXTH_CODES['pubdate'], len(datestr) + 8))
4139+ exth.write(datestr)
4140+ nrecs += 1
4141+ else:
4142+ raise NotImplementedError("missing date or timestamp needed for mobi_periodical")
4143+
4144+ if (oeb.metadata.cover and
4145+ unicode(oeb.metadata.cover[0]) in oeb.manifest.ids):
4146+ id = unicode(oeb.metadata.cover[0])
4147+ item = oeb.manifest.ids[id]
4148+ href = item.href
4149+ if href in self.images:
4150+ index = self.images[href] - 1
4151+ exth.write(pack(b'>III', 0xc9, 0x0c, index))
4152+ exth.write(pack(b'>III', 0xcb, 0x0c, 0))
4153+ nrecs += 2
4154+ index = self.add_thumbnail(item)
4155+ if index is not None:
4156+ exth.write(pack(b'>III', 0xca, 0x0c, index - 1))
4157+ nrecs += 1
4158+
4159+ exth = exth.getvalue()
4160+ trail = len(exth) % 4
4161+ pad = b'\0' * (4 - trail) # Always pad w/ at least 1 byte
4162+ exth = [b'EXTH', pack(b'>II', len(exth) + 12, nrecs), exth, pad]
4163+ return b''.join(exth)
4164+ # }}}
4165+
4166+ def add_thumbnail(self, item):
4167+ try:
4168+ data = rescale_image(item.data, dimen=MAX_THUMB_DIMEN,
4169+ maxsizeb=MAX_THUMB_SIZE)
4170+ except IOError:
4171+ self.oeb.logger.warn('Bad image file %r' % item.href)
4172+ return None
4173+ manifest = self.oeb.manifest
4174+ id, href = manifest.generate('thumbnail', 'thumbnail.jpeg')
4175+ manifest.add(id, href, 'image/jpeg', data=data)
4176+ index = len(self.images) + 1
4177+ self.images[href] = index
4178+ self.records.append(data)
4179+ return index
4180+
4181+ def write_header(self):
4182+ title = ascii_filename(unicode(self.oeb.metadata.title[0]))
4183+ title = title + (b'\0' * (32 - len(title)))
4184+ now = int(time.time())
4185+ nrecords = len(self.records)
4186+ self.write(title, pack(b'>HHIIIIII', 0, 0, now, now, 0, 0, 0, 0),
4187+ b'BOOK', b'MOBI', pack(b'>IIH', nrecords, 0, nrecords))
4188+ offset = self.tell() + (8 * nrecords) + 2
4189+ for i, record in enumerate(self.records):
4190+ self.write(pack(b'>I', offset), b'\0', pack(b'>I', 2*i)[1:])
4191+ offset += len(record)
4192+ self.write(b'\0\0')
4193+
4194+ def write_content(self):
4195+ for record in self.records:
4196+ self.write(record)
4197+
4198+
4199
4200=== added file 'src/calibre/ebooks/mobi/writer2/serializer.py'
4201--- src/calibre/ebooks/mobi/writer2/serializer.py 1970-01-01 00:00:00 +0000
4202+++ src/calibre/ebooks/mobi/writer2/serializer.py 2011-07-19 06:18:03 +0000
4203@@ -0,0 +1,246 @@
4204+#!/usr/bin/env python
4205+# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
4206+from __future__ import (unicode_literals, division, absolute_import,
4207+ print_function)
4208+
4209+__license__ = 'GPL v3'
4210+__copyright__ = '2011, Kovid Goyal <kovid@kovidgoyal.net>'
4211+__docformat__ = 'restructuredtext en'
4212+
4213+from calibre.ebooks.oeb.base import (OEB_DOCS, XHTML, XHTML_NS, XML_NS,
4214+ namespace, prefixname, urlnormalize)
4215+from calibre.ebooks.mobi.mobiml import MBP_NS
4216+
4217+from collections import defaultdict
4218+from urlparse import urldefrag
4219+from cStringIO import StringIO
4220+
4221+
4222+class Serializer(object):
4223+ NSRMAP = {'': None, XML_NS: 'xml', XHTML_NS: '', MBP_NS: 'mbp'}
4224+
4225+ def __init__(self, oeb, images, write_page_breaks_after_item=True):
4226+ '''
4227+ Write all the HTML markup in oeb into a single in memory buffer
4228+ containing a single html document with links replaced by offsets into
4229+ the buffer.
4230+
4231+ :param oeb: OEBBook object that encapsulates the document to be
4232+ processed.
4233+
4234+ :param images: Mapping of image hrefs (urlnormalized) to image record
4235+ indices.
4236+
4237+ :param write_page_breaks_after_item: If True a MOBIpocket pagebreak tag
4238+ is written after every element of the spine in ``oeb``.
4239+ '''
4240+ self.oeb = oeb
4241+ self.images = images
4242+ self.logger = oeb.logger
4243+ self.write_page_breaks_after_item = write_page_breaks_after_item
4244+
4245+ # Mapping of hrefs (urlnormalized) to the offset in the buffer where
4246+ # the resource pointed to by the href lives. Used at the end to fill in
4247+ # the correct values into all filepos="..." links.
4248+ self.id_offsets = {}
4249+
4250+ # Mapping of hrefs (urlnormalized) to a list of offsets into the buffer
4251+ # where filepos="..." elements are written corresponding to links that
4252+ # point to the href. This is used at the end to fill in the correct values.
4253+ self.href_offsets = defaultdict(list)
4254+
4255+ # List of offsets in the buffer of non linear items in the spine. These
4256+ # become uncrossable breaks in the MOBI
4257+ self.breaks = []
4258+
4259+ def __call__(self):
4260+ '''
4261+ Return the document serialized as a single UTF-8 encoded bytestring.
4262+ '''
4263+ buf = self.buf = StringIO()
4264+ buf.write(b'<html>')
4265+ self.serialize_head()
4266+ self.serialize_body()
4267+ buf.write(b'</html>')
4268+ self.fixup_links()
4269+ return buf.getvalue()
4270+
4271+ def serialize_head(self):
4272+ buf = self.buf
4273+ buf.write(b'<head>')
4274+ if len(self.oeb.guide) > 0:
4275+ self.serialize_guide()
4276+ buf.write(b'</head>')
4277+
4278+ def serialize_guide(self):
4279+ '''
4280+ The Kindle decides where to open a book based on the presence of
4281+ an item in the guide that looks like
4282+ <reference type="text" title="Start" href="chapter-one.xhtml"/>
4283+
4284+ Similarly an item with type="toc" controls where the Goto Table of
4285+ Contents operation on the kindle goes.
4286+ '''
4287+
4288+ buf = self.buf
4289+ hrefs = self.oeb.manifest.hrefs
4290+ buf.write(b'<guide>')
4291+ for ref in self.oeb.guide.values():
4292+ path = urldefrag(ref.href)[0]
4293+ if path not in hrefs or hrefs[path].media_type not in OEB_DOCS:
4294+ continue
4295+
4296+ buf.write(b'<reference type="')
4297+ if ref.type.startswith('other.') :
4298+ self.serialize_text(ref.type.replace('other.',''), quot=True)
4299+ else:
4300+ self.serialize_text(ref.type, quot=True)
4301+ buf.write(b'" ')
4302+ if ref.title is not None:
4303+ buf.write(b'title="')
4304+ self.serialize_text(ref.title, quot=True)
4305+ buf.write(b'" ')
4306+ self.serialize_href(ref.href)
4307+ # Space required or won't work, I kid you not
4308+ buf.write(b' />')
4309+
4310+ buf.write(b'</guide>')
4311+
4312+ def serialize_href(self, href, base=None):
4313+ '''
4314+ Serialize the href attribute of an <a> or <reference> tag. It is
4315+ serialized as filepos="000000000" and a pointer to its location is
4316+ stored in self.href_offsets so that the correct value can be filled in
4317+ at the end.
4318+ '''
4319+ hrefs = self.oeb.manifest.hrefs
4320+ path, frag = urldefrag(urlnormalize(href))
4321+ if path and base:
4322+ path = base.abshref(path)
4323+ if path and path not in hrefs:
4324+ return False
4325+ buf = self.buf
4326+ item = hrefs[path] if path else None
4327+ if item and item.spine_position is None:
4328+ return False
4329+ path = item.href if item else base.href
4330+ href = '#'.join((path, frag)) if frag else path
4331+ buf.write(b'filepos=')
4332+ self.href_offsets[href].append(buf.tell())
4333+ buf.write(b'0000000000')
4334+ return True
4335+
4336+ def serialize_body(self):
4337+ '''
4338+ Serialize all items in the spine of the document. Non linear items are
4339+ moved to the end.
4340+ '''
4341+ buf = self.buf
4342+ self.anchor_offset = buf.tell()
4343+ buf.write(b'<body>')
4344+ self.anchor_offset_kindle = buf.tell()
4345+ spine = [item for item in self.oeb.spine if item.linear]
4346+ spine.extend([item for item in self.oeb.spine if not item.linear])
4347+ for item in spine:
4348+ self.serialize_item(item)
4349+ buf.write(b'</body>')
4350+
4351+ def serialize_item(self, item):
4352+ '''
4353+ Serialize an individual item from the spine of the input document.
4354+ A reference to this item is stored in self.href_offsets
4355+ '''
4356+ buf = self.buf
4357+ if not item.linear:
4358+ self.breaks.append(buf.tell() - 1)
4359+ self.id_offsets[urlnormalize(item.href)] = buf.tell()
4360+ # Kindle periodical articles are contained in a <div> tag
4361+ buf.write(b'<div>')
4362+ for elem in item.data.find(XHTML('body')):
4363+ self.serialize_elem(elem, item)
4364+ # Kindle periodical article end marker
4365+ buf.write(b'<div></div>')
4366+ if self.write_page_breaks_after_item:
4367+ buf.write(b'<mbp:pagebreak/>')
4368+ buf.write(b'</div>')
4369+ self.anchor_offset = None
4370+
4371+ def serialize_elem(self, elem, item, nsrmap=NSRMAP):
4372+ buf = self.buf
4373+ if not isinstance(elem.tag, basestring) \
4374+ or namespace(elem.tag) not in nsrmap:
4375+ return
4376+ tag = prefixname(elem.tag, nsrmap)
4377+ # Previous layers take care of @name
4378+ id_ = elem.attrib.pop('id', None)
4379+ if id_:
4380+ href = '#'.join((item.href, id_))
4381+ offset = self.anchor_offset or buf.tell()
4382+ self.id_offsets[urlnormalize(href)] = offset
4383+ if self.anchor_offset is not None and \
4384+ tag == 'a' and not elem.attrib and \
4385+ not len(elem) and not elem.text:
4386+ return
4387+ self.anchor_offset = buf.tell()
4388+ buf.write(b'<')
4389+ buf.write(tag.encode('utf-8'))
4390+ if elem.attrib:
4391+ for attr, val in elem.attrib.items():
4392+ if namespace(attr) not in nsrmap:
4393+ continue
4394+ attr = prefixname(attr, nsrmap)
4395+ buf.write(b' ')
4396+ if attr == 'href':
4397+ if self.serialize_href(val, item):
4398+ continue
4399+ elif attr == 'src':
4400+ href = urlnormalize(item.abshref(val))
4401+ if href in self.images:
4402+ index = self.images[href]
4403+ buf.write(b'recindex="%05d"' % index)
4404+ continue
4405+ buf.write(attr.encode('utf-8'))
4406+ buf.write(b'="')
4407+ self.serialize_text(val, quot=True)
4408+ buf.write(b'"')
4409+ buf.write(b'>')
4410+ if elem.text or len(elem) > 0:
4411+ if elem.text:
4412+ self.anchor_offset = None
4413+ self.serialize_text(elem.text)
4414+ for child in elem:
4415+ self.serialize_elem(child, item)
4416+ if child.tail:
4417+ self.anchor_offset = None
4418+ self.serialize_text(child.tail)
4419+ buf.write(b'</%s>' % tag.encode('utf-8'))
4420+
4421+ def serialize_text(self, text, quot=False):
4422+ text = text.replace('&', '&amp;')
4423+ text = text.replace('<', '&lt;')
4424+ text = text.replace('>', '&gt;')
4425+ text = text.replace(u'\u00AD', '') # Soft-hyphen
4426+ if quot:
4427+ text = text.replace('"', '&quot;')
4428+ self.buf.write(text.encode('utf-8'))
4429+
4430+ def fixup_links(self):
4431+ '''
4432+ Fill in the correct values for all filepos="..." links with the offsets
4433+ of the linked to content (as stored in id_offsets).
4434+ '''
4435+ buf = self.buf
4436+ id_offsets = self.id_offsets
4437+ for href, hoffs in self.href_offsets.items():
4438+ # Iterate over all filepos items
4439+ if href not in id_offsets:
4440+ self.logger.warn('Hyperlink target %r not found' % href)
4441+ # Link to the top of the document, better than just ignoring
4442+ href, _ = urldefrag(href)
4443+ if href in self.id_offsets:
4444+ ioff = self.id_offsets[href]
4445+ for hoff in hoffs:
4446+ buf.seek(hoff)
4447+ buf.write(b'%010d' % ioff)
4448+
4449+
4450
4451=== modified file 'src/calibre/ebooks/oeb/base.py'
4452--- src/calibre/ebooks/oeb/base.py 2011-06-18 16:00:57 +0000
4453+++ src/calibre/ebooks/oeb/base.py 2011-07-19 06:18:03 +0000
4454@@ -1180,8 +1180,9 @@
4455 if memory is None:
4456 from calibre.ptempfile import PersistentTemporaryFile
4457 pt = PersistentTemporaryFile(suffix='_oeb_base_mem_unloader.img')
4458- pt.write(self._data)
4459- pt.close()
4460+ with pt:
4461+ pt.write(self._data)
4462+ self.oeb._temp_files.append(pt.name)
4463 def loader(*args):
4464 with open(pt.name, 'rb') as f:
4465 ans = f.read()
4466@@ -1196,8 +1197,6 @@
4467 self._loader = loader2
4468 self._data = None
4469
4470-
4471-
4472 def __str__(self):
4473 data = self.data
4474 if isinstance(data, etree._Element):
4475@@ -1913,6 +1912,14 @@
4476 self.toc = TOC()
4477 self.pages = PageList()
4478 self.auto_generated_toc = True
4479+ self._temp_files = []
4480+
4481+ def clean_temp_files(self):
4482+ for path in self._temp_files:
4483+ try:
4484+ os.remove(path)
4485+ except:
4486+ pass
4487
4488 @classmethod
4489 def generate(cls, opts):
4490
4491=== modified file 'src/calibre/ebooks/oeb/iterator.py'
4492--- src/calibre/ebooks/oeb/iterator.py 2011-06-02 18:42:59 +0000
4493+++ src/calibre/ebooks/oeb/iterator.py 2011-07-19 06:18:03 +0000
4494@@ -92,7 +92,7 @@
4495 self.config = DynamicConfig(name='iterator')
4496 ext = os.path.splitext(pathtoebook)[1].replace('.', '').lower()
4497 ext = re.sub(r'(x{0,1})htm(l{0,1})', 'html', ext)
4498- self.ebook_ext = ext
4499+ self.ebook_ext = ext.replace('original_', '')
4500
4501 def search(self, text, index, backwards=False):
4502 text = text.lower()
4503
4504=== modified file 'src/calibre/ebooks/oeb/transforms/flatcss.py'
4505--- src/calibre/ebooks/oeb/transforms/flatcss.py 2011-02-24 16:18:10 +0000
4506+++ src/calibre/ebooks/oeb/transforms/flatcss.py 2011-07-19 06:18:03 +0000
4507@@ -318,7 +318,8 @@
4508 for edge in ('top', 'bottom'):
4509 cssdict['%s-%s'%(prop, edge)] = '0pt'
4510 if self.context.insert_blank_line:
4511- cssdict['margin-top'] = cssdict['margin-bottom'] = '0.5em'
4512+ cssdict['margin-top'] = cssdict['margin-bottom'] = \
4513+ '%fem'%self.context.insert_blank_line_size
4514 if self.context.remove_paragraph_spacing:
4515 cssdict['text-indent'] = "%1.1fem" % self.context.remove_paragraph_spacing_indent_size
4516
4517
4518=== modified file 'src/calibre/ebooks/oeb/transforms/guide.py'
4519--- src/calibre/ebooks/oeb/transforms/guide.py 2010-01-29 01:32:52 +0000
4520+++ src/calibre/ebooks/oeb/transforms/guide.py 2011-07-19 06:18:03 +0000
4521@@ -36,5 +36,8 @@
4522 href = urldefrag(self.oeb.guide[x].href)[0]
4523 if x.lower() not in ('cover', 'titlepage', 'masthead', 'toc',
4524 'title-page', 'copyright-page', 'start'):
4525+ item = self.oeb.guide[x]
4526+ if item.title and item.title.lower() == 'start':
4527+ continue
4528 self.oeb.guide.remove(x)
4529
4530
4531=== modified file 'src/calibre/ebooks/oeb/transforms/htmltoc.py'
4532--- src/calibre/ebooks/oeb/transforms/htmltoc.py 2010-09-02 13:48:45 +0000
4533+++ src/calibre/ebooks/oeb/transforms/htmltoc.py 2011-07-19 06:18:03 +0000
4534@@ -45,9 +45,10 @@
4535 }
4536
4537 class HTMLTOCAdder(object):
4538- def __init__(self, title=None, style='nested'):
4539+ def __init__(self, title=None, style='nested', position='end'):
4540 self.title = title
4541 self.style = style
4542+ self.position = position
4543
4544 @classmethod
4545 def config(cls, cfg):
4546@@ -98,7 +99,10 @@
4547 self.add_toc_level(body, oeb.toc)
4548 id, href = oeb.manifest.generate('contents', 'contents.xhtml')
4549 item = oeb.manifest.add(id, href, XHTML_MIME, data=contents)
4550- oeb.spine.add(item, linear=False)
4551+ if self.position == 'end':
4552+ oeb.spine.add(item, linear=False)
4553+ else:
4554+ oeb.spine.insert(0, item, linear=True)
4555 oeb.guide.add('toc', 'Table of Contents', href)
4556
4557 def add_toc_level(self, elem, toc):
4558
4559=== modified file 'src/calibre/gui2/__init__.py'
4560--- src/calibre/gui2/__init__.py 2011-07-10 00:34:12 +0000
4561+++ src/calibre/gui2/__init__.py 2011-07-19 06:18:03 +0000
4562@@ -15,7 +15,6 @@
4563 from calibre.constants import (islinux, iswindows, isbsd, isfrozen, isosx,
4564 config_dir)
4565 from calibre.utils.config import Config, ConfigProxy, dynamic, JSONConfig
4566-from calibre.utils.localization import set_qt_translator
4567 from calibre.ebooks.metadata import MetaInformation
4568 from calibre.utils.date import UNDEFINED_DATE
4569
4570@@ -631,6 +630,22 @@
4571 nw = min(self.width(), nw)
4572 self.resize(nw, nh)
4573
4574+class Translator(QTranslator):
4575+ '''
4576+ Translator to load translations for strings in Qt from the calibre
4577+ translations. Does not support advanced features of Qt like disambiguation
4578+ and plural forms.
4579+ '''
4580+
4581+ def translate(self, *args, **kwargs):
4582+ try:
4583+ src = unicode(args[1])
4584+ except:
4585+ return u''
4586+ t = _
4587+ return t(src)
4588+
4589+
4590 gui_thread = None
4591
4592 qt_app = None
4593@@ -677,9 +692,8 @@
4594 def load_translations(self):
4595 if self._translator is not None:
4596 self.removeTranslator(self._translator)
4597- self._translator = QTranslator(self)
4598- if set_qt_translator(self._translator):
4599- self.installTranslator(self._translator)
4600+ self._translator = Translator(self)
4601+ self.installTranslator(self._translator)
4602
4603 def event(self, e):
4604 if callable(self.file_event_hook) and e.type() == QEvent.FileOpen:
4605
4606=== modified file 'src/calibre/gui2/actions/convert.py'
4607--- src/calibre/gui2/actions/convert.py 2011-06-02 01:28:36 +0000
4608+++ src/calibre/gui2/actions/convert.py 2011-07-19 06:18:03 +0000
4609@@ -12,7 +12,7 @@
4610
4611 from calibre.gui2 import error_dialog, Dispatcher
4612 from calibre.gui2.tools import convert_single_ebook, convert_bulk_ebook
4613-from calibre.utils.config import prefs
4614+from calibre.utils.config import prefs, tweaks
4615 from calibre.gui2.actions import InterfaceAction
4616 from calibre.customize.ui import plugin_for_input_format
4617
4618@@ -118,6 +118,8 @@
4619 def queue_convert_jobs(self, jobs, changed, bad, rows, previous,
4620 converted_func, extra_job_args=[]):
4621 for func, args, desc, fmt, id, temp_files in jobs:
4622+ func, _, same_fmt = func.partition(':')
4623+ same_fmt = same_fmt == 'same_fmt'
4624 input_file = args[0]
4625 input_fmt = os.path.splitext(input_file)[1]
4626 core_usage = 1
4627@@ -131,6 +133,7 @@
4628 job = self.gui.job_manager.run_job(Dispatcher(converted_func),
4629 func, args=args, description=desc,
4630 core_usage=core_usage)
4631+ job.conversion_of_same_fmt = same_fmt
4632 args = [temp_files, fmt, id]+extra_job_args
4633 self.conversion_jobs[job] = tuple(args)
4634
4635@@ -166,14 +169,18 @@
4636 if job.failed:
4637 self.gui.job_exception(job)
4638 return
4639+ same_fmt = getattr(job, 'conversion_of_same_fmt', False)
4640 fmtf = temp_files[-1].name
4641 if os.stat(fmtf).st_size < 1:
4642 raise Exception(_('Empty output file, '
4643 'probably the conversion process crashed'))
4644
4645+ db = self.gui.current_db
4646+ if same_fmt and tweaks['save_original_format']:
4647+ db.save_original_format(book_id, fmt, notify=False)
4648+
4649 with open(temp_files[-1].name, 'rb') as data:
4650- self.gui.library_view.model().db.add_format(book_id, \
4651- fmt, data, index_is_id=True)
4652+ db.add_format(book_id, fmt, data, index_is_id=True)
4653 self.gui.status_bar.show_message(job.description + \
4654 (' completed'), 2000)
4655 finally:
4656
4657=== modified file 'src/calibre/gui2/actions/delete.py'
4658--- src/calibre/gui2/actions/delete.py 2011-06-04 20:00:28 +0000
4659+++ src/calibre/gui2/actions/delete.py 2011-07-19 06:18:03 +0000
4660@@ -81,7 +81,7 @@
4661 class DeleteAction(InterfaceAction):
4662
4663 name = 'Remove Books'
4664- action_spec = (_('Remove books'), 'trash.png', None, _('Del'))
4665+ action_spec = (_('Remove books'), 'trash.png', None, 'Del')
4666 action_type = 'current'
4667
4668 def genesis(self):
4669
4670=== modified file 'src/calibre/gui2/actions/view.py'
4671--- src/calibre/gui2/actions/view.py 2011-05-01 16:33:10 +0000
4672+++ src/calibre/gui2/actions/view.py 2011-07-19 06:18:03 +0000
4673@@ -128,7 +128,8 @@
4674 self.gui.unsetCursor()
4675
4676 def _view_file(self, name):
4677- ext = os.path.splitext(name)[1].upper().replace('.', '')
4678+ ext = os.path.splitext(name)[1].upper().replace('.',
4679+ '').replace('ORIGINAL_', '')
4680 viewer = 'lrfviewer' if ext == 'LRF' else 'ebook-viewer'
4681 internal = ext in config['internally_viewed_formats']
4682 self._launch_viewer(name, viewer, internal)
4683
4684=== modified file 'src/calibre/gui2/convert/look_and_feel.py'
4685--- src/calibre/gui2/convert/look_and_feel.py 2011-01-06 19:46:34 +0000
4686+++ src/calibre/gui2/convert/look_and_feel.py 2011-07-19 06:18:03 +0000
4687@@ -24,7 +24,10 @@
4688 'font_size_mapping', 'line_height', 'minimum_line_height',
4689 'linearize_tables', 'smarten_punctuation',
4690 'disable_font_rescaling', 'insert_blank_line',
4691- 'remove_paragraph_spacing', 'remove_paragraph_spacing_indent_size','input_encoding',
4692+ 'remove_paragraph_spacing',
4693+ 'remove_paragraph_spacing_indent_size',
4694+ 'insert_blank_line_size',
4695+ 'input_encoding',
4696 'asciiize', 'keep_ligatures']
4697 )
4698 for val, text in [
4699
4700=== modified file 'src/calibre/gui2/convert/look_and_feel.ui'
4701--- src/calibre/gui2/convert/look_and_feel.ui 2011-01-25 21:40:18 +0000
4702+++ src/calibre/gui2/convert/look_and_feel.ui 2011-07-19 06:18:03 +0000
4703@@ -6,7 +6,7 @@
4704 <rect>
4705 <x>0</x>
4706 <y>0</y>
4707- <width>600</width>
4708+ <width>642</width>
4709 <height>500</height>
4710 </rect>
4711 </property>
4712@@ -31,7 +31,7 @@
4713 </property>
4714 </widget>
4715 </item>
4716- <item row="1" column="1" colspan="2">
4717+ <item row="1" column="1">
4718 <widget class="QDoubleSpinBox" name="opt_base_font_size">
4719 <property name="suffix">
4720 <string> pt</string>
4721@@ -97,6 +97,29 @@
4722 </item>
4723 </layout>
4724 </item>
4725+ <item row="3" column="0">
4726+ <widget class="QLabel" name="label_6">
4727+ <property name="text">
4728+ <string>Minimum &amp;line height:</string>
4729+ </property>
4730+ <property name="buddy">
4731+ <cstring>opt_minimum_line_height</cstring>
4732+ </property>
4733+ </widget>
4734+ </item>
4735+ <item row="3" column="1">
4736+ <widget class="QDoubleSpinBox" name="opt_minimum_line_height">
4737+ <property name="suffix">
4738+ <string> %</string>
4739+ </property>
4740+ <property name="decimals">
4741+ <number>1</number>
4742+ </property>
4743+ <property name="maximum">
4744+ <double>900.000000000000000</double>
4745+ </property>
4746+ </widget>
4747+ </item>
4748 <item row="4" column="0">
4749 <widget class="QLabel" name="label">
4750 <property name="text">
4751@@ -107,7 +130,7 @@
4752 </property>
4753 </widget>
4754 </item>
4755- <item row="4" column="1" colspan="2">
4756+ <item row="4" column="1">
4757 <widget class="QDoubleSpinBox" name="opt_line_height">
4758 <property name="suffix">
4759 <string> pt</string>
4760@@ -127,6 +150,13 @@
4761 </property>
4762 </widget>
4763 </item>
4764+ <item row="5" column="1" colspan="2">
4765+ <widget class="EncodingComboBox" name="opt_input_encoding">
4766+ <property name="editable">
4767+ <bool>true</bool>
4768+ </property>
4769+ </widget>
4770+ </item>
4771 <item row="6" column="0" colspan="2">
4772 <widget class="QCheckBox" name="opt_remove_paragraph_spacing">
4773 <property name="text">
4774@@ -134,48 +164,58 @@
4775 </property>
4776 </widget>
4777 </item>
4778- <item row="6" column="2" colspan="2">
4779- <layout class="QHBoxLayout" name="horizontalLayout_2">
4780- <item>
4781- <widget class="QLabel" name="label_4">
4782- <property name="text">
4783- <string>Indent size:</string>
4784- </property>
4785- <property name="alignment">
4786- <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
4787- </property>
4788- </widget>
4789- </item>
4790- <item>
4791- <widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size">
4792- <property name="toolTip">
4793- <string>&lt;p&gt;When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string>
4794- </property>
4795- <property name="suffix">
4796- <string> em</string>
4797- </property>
4798- <property name="decimals">
4799- <number>1</number>
4800- </property>
4801- </widget>
4802- </item>
4803- </layout>
4804- </item>
4805- <item row="7" column="0">
4806+ <item row="7" column="0" colspan="2">
4807+ <widget class="QCheckBox" name="opt_insert_blank_line">
4808+ <property name="text">
4809+ <string>Insert &amp;blank line between paragraphs</string>
4810+ </property>
4811+ </widget>
4812+ </item>
4813+ <item row="7" column="4">
4814+ <widget class="QDoubleSpinBox" name="opt_insert_blank_line_size">
4815+ <property name="suffix">
4816+ <string> em</string>
4817+ </property>
4818+ <property name="decimals">
4819+ <number>1</number>
4820+ </property>
4821+ </widget>
4822+ </item>
4823+ <item row="8" column="0">
4824 <widget class="QLabel" name="label_5">
4825 <property name="text">
4826- <string>Text justification:</string>
4827+ <string>Text &amp;justification:</string>
4828+ </property>
4829+ <property name="buddy">
4830+ <cstring>opt_change_justification</cstring>
4831 </property>
4832 </widget>
4833 </item>
4834- <item row="8" column="0">
4835+ <item row="8" column="2" colspan="3">
4836+ <widget class="QComboBox" name="opt_change_justification"/>
4837+ </item>
4838+ <item row="9" column="0">
4839 <widget class="QCheckBox" name="opt_linearize_tables">
4840 <property name="text">
4841 <string>&amp;Linearize tables</string>
4842 </property>
4843 </widget>
4844 </item>
4845- <item row="11" column="0" colspan="4">
4846+ <item row="9" column="1" colspan="4">
4847+ <widget class="QCheckBox" name="opt_asciiize">
4848+ <property name="text">
4849+ <string>&amp;Transliterate unicode characters to ASCII</string>
4850+ </property>
4851+ </widget>
4852+ </item>
4853+ <item row="10" column="1" colspan="2">
4854+ <widget class="QCheckBox" name="opt_keep_ligatures">
4855+ <property name="text">
4856+ <string>Keep &amp;ligatures</string>
4857+ </property>
4858+ </widget>
4859+ </item>
4860+ <item row="12" column="0" colspan="5">
4861 <widget class="QGroupBox" name="groupBox">
4862 <property name="title">
4863 <string>Extra &amp;CSS</string>
4864@@ -187,27 +227,16 @@
4865 </layout>
4866 </widget>
4867 </item>
4868- <item row="7" column="2" colspan="2">
4869- <widget class="QComboBox" name="opt_change_justification"/>
4870- </item>
4871- <item row="8" column="1" colspan="3">
4872- <widget class="QCheckBox" name="opt_asciiize">
4873- <property name="text">
4874- <string>&amp;Transliterate unicode characters to ASCII</string>
4875- </property>
4876- </widget>
4877- </item>
4878- <item row="9" column="0">
4879- <widget class="QCheckBox" name="opt_insert_blank_line">
4880- <property name="text">
4881- <string>Insert &amp;blank line</string>
4882- </property>
4883- </widget>
4884- </item>
4885- <item row="9" column="1" colspan="2">
4886- <widget class="QCheckBox" name="opt_keep_ligatures">
4887- <property name="text">
4888- <string>Keep &amp;ligatures</string>
4889+ <item row="6" column="4">
4890+ <widget class="QDoubleSpinBox" name="opt_remove_paragraph_spacing_indent_size">
4891+ <property name="toolTip">
4892+ <string>&lt;p&gt;When calibre removes inter paragraph spacing, it automatically sets a paragraph indent, to ensure that paragraphs can be easily distinguished. This option controls the width of that indent.</string>
4893+ </property>
4894+ <property name="suffix">
4895+ <string> em</string>
4896+ </property>
4897+ <property name="decimals">
4898+ <number>1</number>
4899 </property>
4900 </widget>
4901 </item>
4902@@ -218,33 +247,29 @@
4903 </property>
4904 </widget>
4905 </item>
4906- <item row="3" column="0">
4907- <widget class="QLabel" name="label_6">
4908- <property name="text">
4909- <string>Minimum &amp;line height:</string>
4910- </property>
4911- <property name="buddy">
4912- <cstring>opt_minimum_line_height</cstring>
4913- </property>
4914- </widget>
4915- </item>
4916- <item row="3" column="1" colspan="2">
4917- <widget class="QDoubleSpinBox" name="opt_minimum_line_height">
4918- <property name="suffix">
4919- <string> %</string>
4920- </property>
4921- <property name="decimals">
4922- <number>1</number>
4923- </property>
4924- <property name="maximum">
4925- <double>900.000000000000000</double>
4926- </property>
4927- </widget>
4928- </item>
4929- <item row="5" column="1" colspan="3">
4930- <widget class="EncodingComboBox" name="opt_input_encoding">
4931- <property name="editable">
4932- <bool>true</bool>
4933+ <item row="6" column="3">
4934+ <widget class="QLabel" name="label_4">
4935+ <property name="text">
4936+ <string>&amp;Indent size:</string>
4937+ </property>
4938+ <property name="alignment">
4939+ <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
4940+ </property>
4941+ <property name="buddy">
4942+ <cstring>opt_remove_paragraph_spacing_indent_size</cstring>
4943+ </property>
4944+ </widget>
4945+ </item>
4946+ <item row="7" column="3">
4947+ <widget class="QLabel" name="label_7">
4948+ <property name="text">
4949+ <string>&amp;Line size:</string>
4950+ </property>
4951+ <property name="alignment">
4952+ <set>Qt::AlignRight|Qt::AlignTrailing|Qt::AlignVCenter</set>
4953+ </property>
4954+ <property name="buddy">
4955+ <cstring>opt_insert_blank_line_size</cstring>
4956 </property>
4957 </widget>
4958 </item>
4959
4960=== modified file 'src/calibre/gui2/convert/mobi_output.py'
4961--- src/calibre/gui2/convert/mobi_output.py 2010-12-20 20:20:21 +0000
4962+++ src/calibre/gui2/convert/mobi_output.py 2011-07-19 06:18:03 +0000
4963@@ -24,7 +24,7 @@
4964 def __init__(self, parent, get_option, get_help, db=None, book_id=None):
4965 Widget.__init__(self, parent,
4966 ['prefer_author_sort', 'rescale_images', 'toc_title',
4967- 'mobi_ignore_margins',
4968+ 'mobi_ignore_margins', 'mobi_toc_at_start',
4969 'dont_compress', 'no_inline_toc', 'masthead_font','personal_doc']
4970 )
4971 from calibre.utils.fonts import fontconfig
4972
4973=== modified file 'src/calibre/gui2/convert/mobi_output.ui'
4974--- src/calibre/gui2/convert/mobi_output.ui 2010-12-12 19:09:43 +0000
4975+++ src/calibre/gui2/convert/mobi_output.ui 2011-07-19 06:18:03 +0000
4976@@ -27,21 +27,21 @@
4977 <item row="1" column="1">
4978 <widget class="QLineEdit" name="opt_toc_title"/>
4979 </item>
4980- <item row="2" column="0" colspan="2">
4981+ <item row="4" column="0" colspan="2">
4982 <widget class="QCheckBox" name="opt_rescale_images">
4983 <property name="text">
4984 <string>Rescale images for &amp;Palm devices</string>
4985 </property>
4986 </widget>
4987 </item>
4988- <item row="3" column="0" colspan="2">
4989+ <item row="5" column="0" colspan="2">
4990 <widget class="QCheckBox" name="opt_prefer_author_sort">
4991 <property name="text">
4992 <string>Use author &amp;sort for author</string>
4993 </property>
4994 </widget>
4995 </item>
4996- <item row="4" column="0">
4997+ <item row="6" column="0">
4998 <widget class="QCheckBox" name="opt_dont_compress">
4999 <property name="text">
5000 <string>Disable compression of the file contents</string>
The diff has been truncated for viewing.

Subscribers

People subscribed via source and target branches