Merge lp:~miurahr/calibre/recipes into lp:calibre

Proposed by Hiroshi Miura
Status: Merged
Merged at revision: 7185
Proposed branch: lp:~miurahr/calibre/recipes
Merge into: lp:calibre
Diff against target: 358 lines (+306/-2)
9 files modified
resources/recipes/ajiajin.recipe (+24/-0)
resources/recipes/chouchoublog.recipe (+37/-0)
resources/recipes/kahokushinpo.recipe (+32/-0)
resources/recipes/nationalgeographic.recipe (+38/-0)
resources/recipes/nationalgeographicjp.recipe (+20/-0)
resources/recipes/nikkei_sub_shakai.recipe (+2/-2)
resources/recipes/paperli.recipe (+58/-0)
resources/recipes/paperli_topic.recipe (+59/-0)
resources/recipes/uninohimitu.recipe (+36/-0)
To merge this branch: bzr merge lp:~miurahr/calibre/recipes
Reviewer Review Type Date Requested Status
Kovid Goyal Pending
Review via email: mp+43470@code.launchpad.net

Description of the change

add more recipes

    - experimental paper.li recipes(en)
      * Wikileaks
      * #osm
    - Ajiajin tech news from Asia and Japan(en)
    - National Geographic News(en)
    - National Geographic Japanese news(ja)
    - Blog: cute Cat photo blog (ja)

fix some recipes

    - Nikkei social (title typo)

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'resources/recipes/ajiajin.recipe'
2--- resources/recipes/ajiajin.recipe 1970-01-01 00:00:00 +0000
3+++ resources/recipes/ajiajin.recipe 2010-12-12 13:51:27 +0000
4@@ -0,0 +1,24 @@
5+__license__ = 'GPL v3'
6+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
7+'''
8+ajiajin.com/blog
9+'''
10+
11+import re
12+from calibre.web.feeds.news import BasicNewsRecipe
13+
14+class AjiajinBlog(BasicNewsRecipe):
15+ title = u'Ajiajin blog'
16+ __author__ = 'Hiroshi Miura'
17+ oldest_article = 5
18+ publication_type = 'blog'
19+ max_articles_per_feed = 100
20+ description = 'The next generation internet trends in Japan and Asia'
21+ publisher = ''
22+ category = 'internet, asia, japan'
23+ language = 'en'
24+ encoding = 'utf-8'
25+
26+ feeds = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
27+
28+
29
30=== added file 'resources/recipes/chouchoublog.recipe'
31--- resources/recipes/chouchoublog.recipe 1970-01-01 00:00:00 +0000
32+++ resources/recipes/chouchoublog.recipe 2010-12-12 13:51:27 +0000
33@@ -0,0 +1,37 @@
34+__license__ = 'GPL v3'
35+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
36+'''
37+http://ameblo.jp/
38+'''
39+
40+import re
41+from calibre.web.feeds.news import BasicNewsRecipe
42+
43+class SakuraBlog(BasicNewsRecipe):
44+ title = u'chou chou blog'
45+ __author__ = 'Hiroshi Miura'
46+ oldest_article = 4
47+ publication_type = 'blog'
48+ max_articles_per_feed = 20
49+ description = 'Japanese popular dog blog'
50+ publisher = ''
51+ category = 'dog, pet, japan'
52+ language = 'ja'
53+ encoding = 'utf-8'
54+ use_embedded_content = True
55+
56+ feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')]
57+
58+ def parse_feeds(self):
59+ feeds = BasicNewsRecipe.parse_feeds(self)
60+ for curfeed in feeds:
61+ delList = []
62+ for a,curarticle in enumerate(curfeed.articles):
63+ if re.search(r'rssad.jp', curarticle.url):
64+ delList.append(curarticle)
65+ if len(delList)>0:
66+ for d in delList:
67+ index = curfeed.articles.index(d)
68+ curfeed.articles[index:index+1] = []
69+ return feeds
70+
71
72=== added file 'resources/recipes/kahokushinpo.recipe'
73--- resources/recipes/kahokushinpo.recipe 1970-01-01 00:00:00 +0000
74+++ resources/recipes/kahokushinpo.recipe 2010-12-12 13:51:27 +0000
75@@ -0,0 +1,32 @@
76+__license__ = 'GPL v3'
77+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
78+'''
79+www.kahoku.co.jp
80+'''
81+
82+import re
83+from calibre.web.feeds.news import BasicNewsRecipe
84+
85+
86+class KahokuShinpoNews(BasicNewsRecipe):
87+ title = u'\u6cb3\u5317\u65b0\u5831'
88+ __author__ = 'Hiroshi Miura'
89+ oldest_article = 2
90+ max_articles_per_feed = 20
91+ description = 'Tohoku regional news paper in Japan'
92+ publisher = 'Kahoku Shinpo Sha'
93+ category = 'news, japan'
94+ language = 'ja'
95+ encoding = 'Shift_JIS'
96+ no_stylesheets = True
97+
98+ feeds = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
99+
100+ keep_only_tags = [ dict(id="page_title"),
101+ dict(id="news_detail"),
102+ dict(id="bt_title"),
103+ {'class':"photoLeft"},
104+ dict(id="bt_body")
105+ ]
106+ remove_tags = [ {'class':"button"}]
107+
108
109=== added file 'resources/recipes/nationalgeographic.recipe'
110--- resources/recipes/nationalgeographic.recipe 1970-01-01 00:00:00 +0000
111+++ resources/recipes/nationalgeographic.recipe 2010-12-12 13:51:27 +0000
112@@ -0,0 +1,38 @@
113+__license__ = 'GPL v3'
114+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
115+'''
116+nationalgeographic.com
117+'''
118+
119+from calibre.web.feeds.news import BasicNewsRecipe
120+import re
121+
122+class NationalGeographicNews(BasicNewsRecipe):
123+ title = u'National Geographic News'
124+ oldest_article = 7
125+ max_articles_per_feed = 100
126+ remove_javascript = True
127+ no_stylesheets = True
128+ use_embedded_content = False
129+
130+ feeds = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')]
131+
132+ remove_tags_before = dict(id='page_head')
133+ remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}]
134+ remove_tags = [
135+ {'class':'hidden'}
136+
137+ ]
138+
139+ def parse_feeds(self):
140+ feeds = BasicNewsRecipe.parse_feeds(self)
141+ for curfeed in feeds:
142+ delList = []
143+ for a,curarticle in enumerate(curfeed.articles):
144+ if re.search(r'ads\.pheedo\.com', curarticle.url):
145+ delList.append(curarticle)
146+ if len(delList)>0:
147+ for d in delList:
148+ index = curfeed.articles.index(d)
149+ curfeed.articles[index:index+1] = []
150+ return feeds
151
152=== added file 'resources/recipes/nationalgeographicjp.recipe'
153--- resources/recipes/nationalgeographicjp.recipe 1970-01-01 00:00:00 +0000
154+++ resources/recipes/nationalgeographicjp.recipe 2010-12-12 13:51:27 +0000
155@@ -0,0 +1,20 @@
156+__license__ = 'GPL v3'
157+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
158+'''
159+nationalgeographic.co.jp
160+'''
161+
162+from calibre.web.feeds.news import BasicNewsRecipe
163+import re
164+
165+class NationalGeoJp(BasicNewsRecipe):
166+ title = u'\u30ca\u30b7\u30e7\u30ca\u30eb\u30fb\u30b8\u30aa\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30cb\u30e5\u30fc\u30b9'
167+ oldest_article = 7
168+ max_articles_per_feed = 100
169+ no_stylesheets = True
170+
171+ feeds = [(u'news', u'http://www.nationalgeographic.co.jp/news/rss.php')]
172+
173+ def print_version(self, url):
174+ return re.sub(r'news_article.php','news_printer_friendly.php', url)
175+
176
177=== modified file 'resources/recipes/nikkei_sub_shakai.recipe'
178--- resources/recipes/nikkei_sub_shakai.recipe 2010-12-02 15:46:25 +0000
179+++ resources/recipes/nikkei_sub_shakai.recipe 2010-12-12 13:51:27 +0000
180@@ -10,8 +10,8 @@
181 from calibre.ptempfile import PersistentTemporaryFile
182
183
184-class NikkeiNet_sub_life(BasicNewsRecipe):
185- title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
186+class NikkeiNet_sub_shakai(BasicNewsRecipe):
187+ title = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
188 __author__ = 'Hiroshi Miura'
189 description = 'News and current market affairs from Japan'
190 cover_url = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
191
192=== added file 'resources/recipes/paperli.recipe'
193--- resources/recipes/paperli.recipe 1970-01-01 00:00:00 +0000
194+++ resources/recipes/paperli.recipe 2010-12-12 13:51:27 +0000
195@@ -0,0 +1,58 @@
196+__license__ = 'GPL v3'
197+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
198+'''
199+paperli
200+'''
201+
202+from calibre.web.feeds.news import BasicNewsRecipe
203+from calibre import strftime
204+import re, sys
205+
206+class paperli(BasicNewsRecipe):
207+#-------------------please change here ----------------
208+ paperli_tag = 'osm'
209+ title = u'The # osm Daily - paperli'
210+#-------------------------------------------------------------
211+ base_url = 'http://paper.li'
212+ index = '/tag/'+paperli_tag+'/~list'
213+
214+ __author__ = 'Hiroshi Miura'
215+ oldest_article = 7
216+ max_articles_per_feed = 100
217+ description = 'paper.li page'
218+ publisher = 'paper.li'
219+ category = 'paper.li'
220+ language = 'en'
221+ encoding = 'utf-8'
222+ remove_javascript = True
223+ timefmt = '[%y/%m/%d]'
224+
225+ def parse_index(self):
226+ feeds = []
227+ newsarticles = []
228+ topic = 'HEADLINE'
229+
230+ #for pages
231+ page = self.index
232+ while True:
233+ soup = self.index_to_soup(''.join([self.base_url,page]))
234+ for itt in soup.findAll('div',attrs={'class':'yui-u'}):
235+ itema = itt.find('a',href=True,attrs={'class':'ts'})
236+ if itema is not None:
237+ itemd = itt.find('div',text=True, attrs={'class':'text'})
238+ newsarticles.append({
239+ 'title' :itema.string
240+ ,'date' :strftime(self.timefmt)
241+ ,'url' :itema['href']
242+ ,'description':itemd.string
243+ })
244+
245+ nextpage = soup.find('div',attrs={'class':'pagination_top'}).find('li', attrs={'class':'next'})
246+ if nextpage is not None:
247+ page = nextpage.find('a', href=True)['href']
248+ else:
249+ break
250+
251+ feeds.append((topic, newsarticles))
252+ return feeds
253+
254
255=== added file 'resources/recipes/paperli_topic.recipe'
256--- resources/recipes/paperli_topic.recipe 1970-01-01 00:00:00 +0000
257+++ resources/recipes/paperli_topic.recipe 2010-12-12 13:51:27 +0000
258@@ -0,0 +1,59 @@
259+__license__ = 'GPL v3'
260+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
261+'''
262+paperli
263+'''
264+
265+from calibre.web.feeds.news import BasicNewsRecipe
266+from calibre import strftime
267+import re
268+
269+class paperli_topics(BasicNewsRecipe):
270+#-------------------please change here ----------------
271+ paperli_tag = 'wikileaks'
272+ title = u'The # wikileaks Daily - paperli'
273+#-------------------------------------------------------------
274+ __author__ = 'Hiroshi Miura'
275+ oldest_article = 7
276+ max_articles_per_feed = 100
277+ description = 'paper.li page about '+ paperli_tag
278+ publisher = 'paper.li'
279+ category = 'paper.li'
280+ language = 'en'
281+ encoding = 'utf-8'
282+ remove_javascript = True
283+ masthead_title = u'The '+ paperli_tag +' Daily'
284+ timefmt = '[%y/%m/%d]'
285+ base_url = 'http://paper.li'
286+ index = base_url+'/tag/'+paperli_tag
287+
288+
289+ def parse_index(self):
290+
291+ # get topics
292+ topics = []
293+ soup = self.index_to_soup(self.index)
294+ topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
295+ for item in topics_lists.findAll('li', attrs={'class':""}):
296+ itema = item.find('a',href=True)
297+ topics.append({'title': itema.string, 'url': itema['href']})
298+
299+ #get feeds
300+ feeds = []
301+ for topic in topics:
302+ newsarticles = []
303+ soup = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
304+ topstories = soup.findAll('div',attrs={'class':'yui-u'})
305+ for itt in topstories:
306+ itema = itt.find('a',href=True,attrs={'class':'ts'})
307+ if itema is not None:
308+ itemd = itt.find('div',text=True, attrs={'class':'text'})
309+ newsarticles.append({
310+ 'title' :itema.string
311+ ,'date' :strftime(self.timefmt)
312+ ,'url' :itema['href']
313+ ,'description':itemd.string
314+ })
315+ feeds.append((topic['title'], newsarticles))
316+ return feeds
317+
318
319=== added file 'resources/recipes/uninohimitu.recipe'
320--- resources/recipes/uninohimitu.recipe 1970-01-01 00:00:00 +0000
321+++ resources/recipes/uninohimitu.recipe 2010-12-12 13:51:27 +0000
322@@ -0,0 +1,36 @@
323+__license__ = 'GPL v3'
324+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
325+'''
326+http://ameblo.jp/sauta19/
327+'''
328+
329+import re
330+from calibre.web.feeds.news import BasicNewsRecipe
331+
332+class UniNoHimituKichiBlog(BasicNewsRecipe):
333+ title = u'Uni secret base'
334+ __author__ = 'Hiroshi Miura'
335+ oldest_article = 2
336+ publication_type = 'blog'
337+ max_articles_per_feed = 20
338+ description = 'Japanese famous Cat blog'
339+ publisher = ''
340+ category = 'cat, pet, japan'
341+ language = 'ja'
342+ encoding = 'utf-8'
343+
344+ feeds = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
345+
346+ def parse_feeds(self):
347+ feeds = BasicNewsRecipe.parse_feeds(self)
348+ for curfeed in feeds:
349+ delList = []
350+ for a,curarticle in enumerate(curfeed.articles):
351+ if re.search(r'rssad.jp', curarticle.url):
352+ delList.append(curarticle)
353+ if len(delList)>0:
354+ for d in delList:
355+ index = curfeed.articles.index(d)
356+ curfeed.articles[index:index+1] = []
357+ return feeds
358+

Subscribers

People subscribed via source and target branches