Merge lp:~niluje/calibre/recipes into lp:calibre

Proposed by NiLuJe
Status: Merged
Merged at revision: 13266
Proposed branch: lp:~niluje/calibre/recipes
Merge into: lp:calibre
Diff against target: 99 lines (+62/-6)
2 files modified
recipes/doghousediaries.recipe (+52/-0)
recipes/xkcd.recipe (+10/-6)
To merge this branch: bzr merge lp:~niluje/calibre/recipes
Reviewer Review Type Date Requested Status
Kovid Goyal Pending
Review via email: mp+124463@code.launchpad.net

Description of the change

Part I of the patches alluded to in http://www.mobileread.com/forums/showpost.php?p=2222408&postcount=26

Mostly trivial recipes tweaks.

To post a comment you must log in.
lp:~niluje/calibre/recipes updated
13266. By Kovid Goyal

Update xkcd and add doghousediaries by NiLuJe

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'recipes/doghousediaries.recipe'
2--- recipes/doghousediaries.recipe 1970-01-01 00:00:00 +0000
3+++ recipes/doghousediaries.recipe 2012-09-14 16:15:25 +0000
4@@ -0,0 +1,52 @@
5+__license__ = 'GPL v3'
6+__copyright__ = '2010-2012, NiLuJe <niluje at ak-team.com>'
7+
8+'''
9+Fetch DoghouseDiaries.
10+'''
11+
12+import re
13+from calibre.web.feeds.news import BasicNewsRecipe
14+
15+class DoghouseDiaries(BasicNewsRecipe):
16+ title = 'Doghouse Diaries'
17+ description = 'A webcomic.'
18+ __author__ = 'NiLuJe'
19+ language = 'en'
20+
21+ use_embedded_content = False
22+ # 14 comics per fetch (not really days... but we can't easily get the date of individual comics, short of parsing each one...)
23+ oldest_article = 14
24+
25+ cover_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
26+ masthead_url = 'http://www.thedoghousediaries.com/logos/logo3.png'
27+
28+ keep_only_tags = [dict(name='img', attrs={'class': re.compile("comic-item*")}), dict(name='h1'), dict(name='div', attrs={'class':'entry'}), dict(name='p', id='alttext')]
29+ remove_tags = [dict(name='div', attrs={'class':'pin-it-btn-wrapper'}), dict(name='span'), dict(name='div', id='wp_fb_like_button')]
30+ remove_attributes = ['width', 'height']
31+ no_stylesheets = True
32+
33+ # Turn image bubblehelp into a paragraph (NOTE: We run before the remove_tags cleanup, so we need to make sure we only parse the comic-item img, not the pinterest one pulled by the entry div)
34+ preprocess_regexps = [
35+ (re.compile(r'(<img.*src="http://thedoghousediaries.com/comics/.*title=")([^"]+)(".*>)'),
36+ lambda m: '%s%s<p id="alttext"><strong>%s</strong></p>' % (m.group(1), m.group(3), m.group(2)))
37+ ]
38+
39+ def parse_index(self):
40+ INDEX = 'http://www.thedoghousediaries.com/'
41+
42+ soup = self.index_to_soup(INDEX)
43+ articles = []
44+ # Since the feed sucks, and there's no real archive, we use the 'Quick Archive' thingie, but we can't get the date from here, so stop after 14 comics...
45+ for item in soup.findAll('option', {}, True, None, self.oldest_article+1):
46+ # Skip the quick archive itself
47+ if ( item['value'] != '0' ):
48+ articles.append({
49+ 'title': self.tag_to_string(item).encode('UTF-8'),
50+ 'url': item['value'],
51+ 'description': '',
52+ 'content': '',
53+ })
54+
55+ return [('Doghouse Diaries', articles)]
56+
57
58=== modified file 'recipes/xkcd.recipe'
59--- recipes/xkcd.recipe 2012-04-19 03:36:38 +0000
60+++ recipes/xkcd.recipe 2012-09-14 16:15:25 +0000
61@@ -2,6 +2,8 @@
62 __copyright__ = '2008, Kovid Goyal <kovid at kovidgoyal.net>'
63 '''
64 Changelog:
65+2012-04-06
66+Fixed empty articles, added masthead img (NiLuJe)
67 2011-09-24
68 Changed cover (drMerry)
69 '''
70@@ -13,7 +15,8 @@
71 from calibre.web.feeds.news import BasicNewsRecipe
72
73 class XkcdCom(BasicNewsRecipe):
74- cover_url = 'http://imgs.xkcd.com/s/9be30a7.png'
75+ cover_url = 'http://imgs.xkcd.com/static/terrible_small_logo.png'
76+ masthead_url = 'http://imgs.xkcd.com/static/terrible_small_logo.png'
77 title = 'xkcd'
78 description = 'A webcomic of romance and math humor.'
79 __author__ = 'Martin Pitt updated by DrMerry.'
80@@ -21,13 +24,14 @@
81
82 use_embedded_content = False
83 oldest_article = 60
84- keep_only_tags = [dict(id='middleContainer')]
85- remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
86+ #keep_only_tags = [dict(id='middleContainer')]
87+ #remove_tags = [dict(name='ul'), dict(name='h3'), dict(name='br')]
88+ keep_only_tags = [dict(id='comic')]
89 no_stylesheets = True
90- # turn image bubblehelp into a paragraph
91+ # turn image bubblehelp into a paragraph, and put alt in a heading
92 preprocess_regexps = [
93- (re.compile(r'(<img.*title=")([^"]+)(".*>)'),
94- lambda m: '%s%s<p>%s</p>' % (m.group(1), m.group(3), m.group(2)))
95+ (re.compile(r'(<img.*title=")([^"]+)(".alt=")([^"]+)(".*>)'),
96+ lambda m: '<h1>%s</h1>%s%s%s<p>%s</p>' % (m.group(4), m.group(1), m.group(3), m.group(5), m.group(2)))
97 ]
98
99 def parse_index(self):

Subscribers

People subscribed via source and target branches