Merge lp:~miurahr/calibre/recipes into lp:calibre

Proposed by Hiroshi Miura
Status: Merged
Merged at revision: 7162
Proposed branch: lp:~miurahr/calibre/recipes
Merge into: lp:calibre
Diff against target: 122 lines (+78/-2)
3 files modified
resources/recipes/mainichi_it_news.recipe (+3/-1)
resources/recipes/the_h.recipe (+7/-1)
resources/recipes/toyokeizai.recipe (+68/-0)
To merge this branch: bzr merge lp:~miurahr/calibre/recipes
Reviewer Review Type Date Requested Status
Kovid Goyal Pending
Review via email: mp+43092@code.launchpad.net

Description of the change

maintenance of recipes

  - fix several minor bug in Japanese recipes
  - Add Toyokezai, east asia economics magazine

To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'resources/recipes/mainichi_it_news.recipe'
2--- resources/recipes/mainichi_it_news.recipe 2010-12-02 15:43:59 +0000
3+++ resources/recipes/mainichi_it_news.recipe 2010-12-08 15:06:26 +0000
4@@ -1,4 +1,5 @@
5 from calibre.web.feeds.news import BasicNewsRecipe
6+import re
7
8 class MainichiDailyITNews(BasicNewsRecipe):
9 title = u'\u6bce\u65e5\u65b0\u805e(IT&\u5bb6\u96fb)'
10@@ -14,6 +15,7 @@
11
12 remove_tags_before = {'class':"NewsTitle"}
13 remove_tags = [{'class':"RelatedArticle"}]
14+ remove_tags_after = {'class':"Credit"}
15
16 def parse_feeds(self):
17
18@@ -29,4 +31,4 @@
19 index = curfeed.articles.index(d)
20 curfeed.articles[index:index+1] = []
21
22- return feeds remove_tags_after = {'class':"Credit"}
23+ return feeds
24
25=== modified file 'resources/recipes/the_h.recipe'
26--- resources/recipes/the_h.recipe 2010-11-23 16:04:13 +0000
27+++ resources/recipes/the_h.recipe 2010-12-08 15:06:26 +0000
28@@ -14,7 +14,7 @@
29 oldest_article = 3
30 description = 'In association with Heise Online'
31 publisher = 'Heise Media UK Ltd.'
32- category = 'news, technology, security'
33+ category = 'news, technology, security, OSS, internet'
34 max_articles_per_feed = 100
35 language = 'en'
36 encoding = 'utf-8'
37@@ -27,6 +27,12 @@
38 feeds = [
39 (u'The H News Feed', u'http://www.h-online.com/news/atom.xml')
40 ]
41+ cover_url = 'http://www.h-online.com/icons/logo_theH.gif'
42+
43+ remove_tags = [
44+ dict(id="logo"),
45+ dict(id="footer")
46+ ]
47
48 def print_version(self, url):
49 return url + '?view=print'
50
51=== added file 'resources/recipes/toyokeizai.recipe'
52--- resources/recipes/toyokeizai.recipe 1970-01-01 00:00:00 +0000
53+++ resources/recipes/toyokeizai.recipe 2010-12-08 15:06:26 +0000
54@@ -0,0 +1,68 @@
55+__license__ = 'GPL v3'
56+__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
57+'''
58+www.toyokeizai.net
59+'''
60+
61+from calibre.web.feeds.news import BasicNewsRecipe
62+import re
63+
64+class Toyokeizai(BasicNewsRecipe):
65+ title = u'ToyoKeizai News'
66+ __author__ = 'Hiroshi Miura'
67+ oldest_article = 1
68+ max_articles_per_feed = 50
69+ description = 'Japanese traditional economy and business magazine, only for advanced subscribers supported'
70+ publisher = 'Toyokeizai Shinbun Sha'
71+ category = 'economy, magazine, japan'
72+ language = 'ja'
73+ encoding = 'euc-jp'
74+ index = 'http://member.toyokeizai.net/news/'
75+ remove_javascript = True
76+ no_stylesheets = True
77+ masthead_title = u'TOYOKEIZAI'
78+ needs_subscription = True
79+ timefmt = '[%y/%m/%d]'
80+ recursions = 5
81+ match_regexps =[ r'page/\d+']
82+
83+ keep_only_tags = [
84+ dict(name='div', attrs={'class':['news']}),
85+ dict(name='div', attrs={'class':["news_cont"]}),
86+ dict(name='div', attrs={'class':["news_con"]}),
87+# dict(name='div', attrs={'class':["norightsMessage"]})
88+ ]
89+ remove_tags = [{'class':"mt35 mgz"},
90+ {'class':"mt20 newzia"},
91+ {'class':"mt20 fontS"},
92+ {'class':"bk_btn_m"},
93+ dict(id='newzia_connect_member')
94+ ]
95+
96+ def parse_index(self):
97+ feeds = []
98+ soup = self.index_to_soup(self.index)
99+ topstories = soup.find('ul',attrs={'class':'list6'})
100+ if topstories:
101+ newsarticles = []
102+ for itt in topstories.findAll('li'):
103+ itema = itt.find('a',href=True)
104+ itemd = itt.find('span')
105+ newsarticles.append({
106+ 'title' :itema.string
107+ ,'date' :re.compile(r"\- ").sub("",itemd.string)
108+ ,'url' :'http://member.toyokeizai.net' + itema['href']
109+ ,'description':itema['title']
110+ })
111+ feeds.append(('news', newsarticles))
112+ return feeds
113+
114+ def get_browser(self):
115+ br = BasicNewsRecipe.get_browser()
116+ if self.username is not None and self.password is not None:
117+ br.open('http://member.toyokeizai.net/norights/form/')
118+ br.select_form(nr=0)
119+ br['kaiin_id'] = self.username
120+ br['password'] = self.password
121+ res = br.submit()
122+ return br

Subscribers

People subscribed via source and target branches