calibre

Merge lp:~miurahr/calibre/recipes into lp:calibre

recipes
Merge into trunk

Proposed by Hiroshi Miura on 2010-12-12

Status:	Merged
Merged at revision:	7185
Proposed branch:	lp:~miurahr/calibre/recipes
Merge into:	lp:calibre
Diff against target:	358 lines (+306/-2) 9 files modified resources/recipes/ajiajin.recipe (+24/-0) resources/recipes/chouchoublog.recipe (+37/-0) resources/recipes/kahokushinpo.recipe (+32/-0) resources/recipes/nationalgeographic.recipe (+38/-0) resources/recipes/nationalgeographicjp.recipe (+20/-0) resources/recipes/nikkei_sub_shakai.recipe (+2/-2) resources/recipes/paperli.recipe (+58/-0) resources/recipes/paperli_topic.recipe (+59/-0) resources/recipes/uninohimitu.recipe (+36/-0)
To merge this branch:	bzr merge lp:~miurahr/calibre/recipes
Related bugs:	Link a bug report

Reviewer	Review Type	Date Requested	Status
Kovid Goyal		2010-12-12	Pending
Review via email: mp+43470@code.launchpad.net

Description of the change

add more recipes

    - experimental paper.li recipes(en)
      * Wikileaks
      * #osm
    - Ajiajin tech news from Asia and Japan(en)
    - National Geographic News(en)
    - National Geographic Japanese news(ja)
    - Blog: cute Cat photo blog (ja)

fix some recipes

- Nikkei social (title typo)

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk

Subscribers

People subscribed via source and target branches

to all changes:

Ali Baba

Hiroshi Miura

Kovid Goyal

Pankaj

Timothy Legge

calibre developers

gstoychev

 === added file 'resources/recipes/ajiajin.recipe'
 --- resources/recipes/ajiajin.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/ajiajin.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,24 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++ajiajin.com/blog
++'''
++
++import re
++from calibre.web.feeds.news import BasicNewsRecipe
++
++class AjiajinBlog(BasicNewsRecipe):
++    title          = u'Ajiajin blog'
++    __author__     = 'Hiroshi Miura'
++    oldest_article = 5
++    publication_type = 'blog'
++    max_articles_per_feed = 100
++    description    = 'The next generation internet trends in Japan and Asia'
++    publisher      = ''
++    category       = 'internet, asia, japan'
++    language       = 'en'
++    encoding      = 'utf-8'
++
++    feeds          = [(u'blog', u'http://feeds.feedburner.com/Asiajin')]
++
++
 === added file 'resources/recipes/chouchoublog.recipe'
 --- resources/recipes/chouchoublog.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/chouchoublog.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,37 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++http://ameblo.jp/
++'''
++
++import re
++from calibre.web.feeds.news import BasicNewsRecipe
++
++class SakuraBlog(BasicNewsRecipe):
++    title          = u'chou chou blog'
++    __author__     = 'Hiroshi Miura'
++    oldest_article = 4
++    publication_type = 'blog'
++    max_articles_per_feed = 20
++    description    = 'Japanese popular dog blog'
++    publisher      = ''
++    category       = 'dog, pet, japan'
++    language       = 'ja'
++    encoding      = 'utf-8'
++    use_embedded_content = True
++
++    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/chouchou1218/rss20.xml')]
++
++    def parse_feeds(self):
++        feeds = BasicNewsRecipe.parse_feeds(self)
++        for curfeed in feeds:
++            delList = []
++            for a,curarticle in enumerate(curfeed.articles):
++                if re.search(r'rssad.jp', curarticle.url):
++                    delList.append(curarticle)
++            if len(delList)>0:
++                for d in delList:
++                    index = curfeed.articles.index(d)
++                    curfeed.articles[index:index+1] = []
++        return feeds
++
 === added file 'resources/recipes/kahokushinpo.recipe'
 --- resources/recipes/kahokushinpo.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/kahokushinpo.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,32 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++www.kahoku.co.jp
++'''
++
++import re
++from calibre.web.feeds.news import BasicNewsRecipe
++
++
++class KahokuShinpoNews(BasicNewsRecipe):
++    title          = u'\u6cb3\u5317\u65b0\u5831'
++    __author__     = 'Hiroshi Miura'
++    oldest_article = 2
++    max_articles_per_feed = 20
++    description    = 'Tohoku regional news paper in Japan'
++    publisher      = 'Kahoku Shinpo Sha'
++    category       = 'news, japan'
++    language       = 'ja'
++    encoding      = 'Shift_JIS'
++    no_stylesheets = True
++
++    feeds          = [(u'news', u'http://www.kahoku.co.jp/rss/index_thk.xml')]
++
++    keep_only_tags = [ dict(id="page_title"),
++                                   dict(id="news_detail"),
++                                   dict(id="bt_title"),
++                                   {'class':"photoLeft"},
++                                   dict(id="bt_body")
++                                 ]
++    remove_tags = [ {'class':"button"}]
++
 === added file 'resources/recipes/nationalgeographic.recipe'
 --- resources/recipes/nationalgeographic.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/nationalgeographic.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,38 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++nationalgeographic.com
++'''
++
++from calibre.web.feeds.news import BasicNewsRecipe
++import re
++
++class NationalGeographicNews(BasicNewsRecipe):
++    title          = u'National Geographic News'
++    oldest_article = 7
++    max_articles_per_feed = 100
++    remove_javascript = True
++    no_stylesheets = True
++    use_embedded_content = False
++
++    feeds          = [(u'news', u'http://feeds.nationalgeographic.com/ng/News/News_Main')]
++
++    remove_tags_before = dict(id='page_head')
++    remove_tags_after = [dict(id='social_buttons'),{'class':'aside'}]
++    remove_tags = [
++                       {'class':'hidden'}
++
++                     ]
++
++    def parse_feeds(self):
++        feeds = BasicNewsRecipe.parse_feeds(self)
++        for curfeed in feeds:
++            delList = []
++            for a,curarticle in enumerate(curfeed.articles):
++                if re.search(r'ads\.pheedo\.com', curarticle.url):
++                    delList.append(curarticle)
++            if len(delList)>0:
++                for d in delList:
++                    index = curfeed.articles.index(d)
++                    curfeed.articles[index:index+1] = []
++        return feeds
 === added file 'resources/recipes/nationalgeographicjp.recipe'
 --- resources/recipes/nationalgeographicjp.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/nationalgeographicjp.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,20 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++nationalgeographic.co.jp
++'''
++
++from calibre.web.feeds.news import BasicNewsRecipe
++import re
++
++class NationalGeoJp(BasicNewsRecipe):
++    title          = u'\u30ca\u30b7\u30e7\u30ca\u30eb\u30fb\u30b8\u30aa\u30b0\u30e9\u30d5\u30a3\u30c3\u30af\u30cb\u30e5\u30fc\u30b9'
++    oldest_article = 7
++    max_articles_per_feed = 100
++    no_stylesheets = True
++
++    feeds          = [(u'news', u'http://www.nationalgeographic.co.jp/news/rss.php')]
++
++    def print_version(self, url):
++        return re.sub(r'news_article.php','news_printer_friendly.php', url)
++
 === modified file 'resources/recipes/nikkei_sub_shakai.recipe'
 --- resources/recipes/nikkei_sub_shakai.recipe	2010-12-02 15:46:25 +0000
 +++ resources/recipes/nikkei_sub_shakai.recipe	2010-12-12 13:51:27 +0000
@@ -10,8 +10,8 @@
  from calibre.ptempfile import PersistentTemporaryFile
--class NikkeiNet_sub_life(BasicNewsRecipe):
--    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(\u751f\u6d3b)'
++class NikkeiNet_sub_shakai(BasicNewsRecipe):
++    title           = u'\u65e5\u7d4c\u65b0\u805e\u96fb\u5b50\u7248(Social)'
      __author__      = 'Hiroshi Miura'
      description     = 'News and current market affairs from Japan'
      cover_url       = 'http://parts.nikkei.com/parts/ds/images/common/logo_r1.svg'
 === added file 'resources/recipes/paperli.recipe'
 --- resources/recipes/paperli.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/paperli.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,58 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++paperli
++'''
++
++from calibre.web.feeds.news import BasicNewsRecipe
++from calibre import strftime
++import re, sys
++
++class paperli(BasicNewsRecipe):
++#-------------------please change here ----------------
++    paperli_tag = 'osm'
++    title          = u'The # osm Daily - paperli'
++#-------------------------------------------------------------
++    base_url     = 'http://paper.li'
++    index          = '/tag/'+paperli_tag+'/~list'
++
++    __author__     = 'Hiroshi Miura'
++    oldest_article = 7
++    max_articles_per_feed = 100
++    description    = 'paper.li page'
++    publisher      = 'paper.li'
++    category       = 'paper.li'
++    language       = 'en'
++    encoding       = 'utf-8'
++    remove_javascript = True
++    timefmt        = '[%y/%m/%d]'
++
++    def parse_index(self):
++        feeds = []
++        newsarticles = []
++        topic = 'HEADLINE'
++
++        #for pages
++        page = self.index
++        while True:
++            soup = self.index_to_soup(''.join([self.base_url,page]))
++            for itt in soup.findAll('div',attrs={'class':'yui-u'}):
++                itema = itt.find('a',href=True,attrs={'class':'ts'})
++                if itema is not None:
++                    itemd = itt.find('div',text=True, attrs={'class':'text'})
++                    newsarticles.append({
++                                      'title'      :itema.string
++                                     ,'date'     :strftime(self.timefmt)
++                                     ,'url'        :itema['href']
++                                     ,'description':itemd.string
++                                    })
++
++            nextpage = soup.find('div',attrs={'class':'pagination_top'}).find('li', attrs={'class':'next'})
++            if nextpage is not None:
++                page = nextpage.find('a', href=True)['href']
++            else:
++                break
++
++        feeds.append((topic, newsarticles))
++        return feeds
++
 === added file 'resources/recipes/paperli_topic.recipe'
 --- resources/recipes/paperli_topic.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/paperli_topic.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,59 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++paperli
++'''
++
++from calibre.web.feeds.news import BasicNewsRecipe
++from calibre import strftime
++import re
++
++class paperli_topics(BasicNewsRecipe):
++#-------------------please change here ----------------
++    paperli_tag = 'wikileaks'
++    title          = u'The # wikileaks Daily - paperli'
++#-------------------------------------------------------------
++    __author__     = 'Hiroshi Miura'
++    oldest_article = 7
++    max_articles_per_feed = 100
++    description    = 'paper.li page about '+ paperli_tag
++    publisher      = 'paper.li'
++    category       = 'paper.li'
++    language       = 'en'
++    encoding       = 'utf-8'
++    remove_javascript = True
++    masthead_title = u'The '+ paperli_tag +' Daily'
++    timefmt        = '[%y/%m/%d]'
++    base_url     = 'http://paper.li'
++    index          = base_url+'/tag/'+paperli_tag
++
++
++    def parse_index(self):
++
++        # get topics
++        topics = []
++        soup   = self.index_to_soup(self.index)
++        topics_lists = soup.find('div',attrs={'class':'paper-nav-bottom'})
++        for item in topics_lists.findAll('li', attrs={'class':""}):
++            itema = item.find('a',href=True)
++            topics.append({'title': itema.string, 'url': itema['href']})
++
++        #get feeds
++        feeds = []
++        for topic in topics:
++            newsarticles = []
++            soup   = self.index_to_soup(''.join([self.base_url, topic['url'] ]))
++            topstories = soup.findAll('div',attrs={'class':'yui-u'})
++            for itt in topstories:
++                itema = itt.find('a',href=True,attrs={'class':'ts'})
++                if itema is not None:
++                    itemd = itt.find('div',text=True, attrs={'class':'text'})
++                    newsarticles.append({
++                                      'title'      :itema.string
++                                     ,'date'     :strftime(self.timefmt)
++                                     ,'url'        :itema['href']
++                                     ,'description':itemd.string
++                                    })
++            feeds.append((topic['title'], newsarticles))
++        return feeds
++
 === added file 'resources/recipes/uninohimitu.recipe'
 --- resources/recipes/uninohimitu.recipe	1970-01-01 00:00:00 +0000
 +++ resources/recipes/uninohimitu.recipe	2010-12-12 13:51:27 +0000
@@ -0,0 +1,36 @@
++__license__   = 'GPL v3'
++__copyright__ = '2010, Hiroshi Miura <miurahr@linux.com>'
++'''
++http://ameblo.jp/sauta19/
++'''
++
++import re
++from calibre.web.feeds.news import BasicNewsRecipe
++
++class UniNoHimituKichiBlog(BasicNewsRecipe):
++    title          = u'Uni secret base'
++    __author__     = 'Hiroshi Miura'
++    oldest_article = 2
++    publication_type = 'blog'
++    max_articles_per_feed = 20
++    description    = 'Japanese famous Cat blog'
++    publisher      = ''
++    category       = 'cat, pet, japan'
++    language       = 'ja'
++    encoding      = 'utf-8'
++
++    feeds          = [(u'blog', u'http://feedblog.ameba.jp/rss/ameblo/sauta19/rss20.xml')]
++
++    def parse_feeds(self):
++        feeds = BasicNewsRecipe.parse_feeds(self)
++        for curfeed in feeds:
++            delList = []
++            for a,curarticle in enumerate(curfeed.articles):
++                if re.search(r'rssad.jp', curarticle.url):
++                    delList.append(curarticle)
++            if len(delList)>0:
++                for d in delList:
++                    index = curfeed.articles.index(d)
++                    curfeed.articles[index:index+1] = []
++        return feeds
++

calibre

Merge lp:~miurahr/calibre/recipes into lp:calibre

Commit message

Description of the change

Preview Diff

Subscribers