Merge lp:~tomek3d/calibre/kalibrator into lp:calibre

Proposed by Tomasz Długosz
Status: Merged
Merged at revision: 13703
Proposed branch: lp:~tomek3d/calibre/kalibrator
Merge into: lp:calibre
Diff against target: 288 lines (+252/-0)
5 files modified
recipes/antyweb.recipe (+49/-0)
recipes/bankier_pl.recipe (+51/-0)
recipes/f1_ultra.recipe (+35/-0)
recipes/myapple_pl.recipe (+50/-0)
recipes/telepolis_pl.recipe (+67/-0)
To merge this branch: bzr merge lp:~tomek3d/calibre/kalibrator
Reviewer Review Type Date Requested Status
Kovid Goyal Pending
Review via email: mp+134812@code.launchpad.net
To post a comment you must log in.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== added file 'recipes/antyweb.recipe'
2--- recipes/antyweb.recipe 1970-01-01 00:00:00 +0000
3+++ recipes/antyweb.recipe 2012-11-18 17:55:22 +0000
4@@ -0,0 +1,49 @@
5+import re
6+
7+from calibre.web.feeds.news import BasicNewsRecipe
8+
9+class AntywebRecipe(BasicNewsRecipe):
10+ encoding = 'utf-8'
11+ __license__ = 'GPL v3'
12+ __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
13+ language = 'pl'
14+ version = 1
15+ title = u'Antyweb'
16+ category = u'News'
17+ description = u'Blog o internecie i nowych technologiach'
18+ cover_url=''
19+ remove_empty_feeds= True
20+ auto_cleanup = False
21+ no_stylesheets=True
22+ use_embedded_content = False
23+ oldest_article = 1
24+ max_articles_per_feed = 100
25+ remove_javascript = True
26+ simultaneous_downloads = 3
27+
28+ keep_only_tags =[]
29+ keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'}))
30+ keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'}))
31+
32+
33+ remove_tags =[]
34+ remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'}))
35+ remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'}))
36+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'}))
37+ remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'}))
38+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'}))
39+
40+
41+ extra_css = '''
42+ body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
43+ '''
44+
45+ feeds = [
46+ (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'),
47+ ]
48+ def preprocess_html(self, soup):
49+ for alink in soup.findAll('a'):
50+ if alink.string is not None:
51+ tstr = alink.string
52+ alink.replaceWith(tstr)
53+ return soup
54
55=== added file 'recipes/bankier_pl.recipe'
56--- recipes/bankier_pl.recipe 1970-01-01 00:00:00 +0000
57+++ recipes/bankier_pl.recipe 2012-11-18 17:55:22 +0000
58@@ -0,0 +1,51 @@
59+#!/usr/bin/env python
60+
61+__license__ = 'GPL v3'
62+__author__ = 'teepel <teepel44@gmail.com>'
63+
64+'''
65+bankier.pl
66+'''
67+
68+from calibre.web.feeds.news import BasicNewsRecipe
69+import re
70+
71+class bankier(BasicNewsRecipe):
72+ title = u'Bankier.pl'
73+ __author__ = 'teepel <teepel44@gmail.com>'
74+ language = 'pl'
75+ description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.'
76+ masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif'
77+ INDEX='http://bankier.pl/'
78+ remove_empty_feeds= True
79+ oldest_article = 1
80+ max_articles_per_feed = 100
81+ remove_javascript=True
82+ no_stylesheets=True
83+ simultaneous_downloads = 5
84+
85+ keep_only_tags =[]
86+ keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'}))
87+
88+ remove_tags =[]
89+ remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'}))
90+ remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'}))
91+ remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'}))
92+ #remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'}))
93+ #remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'}))
94+
95+ feeds = [
96+ (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'),
97+ (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'),
98+ (u'Firma', u'http://feeds.feedburner.com/bankier-firma'),
99+ (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'),
100+ (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'),
101+ (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'),
102+ ]
103+ def print_version(self, url):
104+ segment = url.split('.')
105+ urlPart = segment[2]
106+ segments = urlPart.split('-')
107+ urlPart2 = segments[-1]
108+ return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2
109+
110\ No newline at end of file
111
112=== added file 'recipes/f1_ultra.recipe'
113--- recipes/f1_ultra.recipe 1970-01-01 00:00:00 +0000
114+++ recipes/f1_ultra.recipe 2012-11-18 17:55:22 +0000
115@@ -0,0 +1,35 @@
116+from calibre.web.feeds.news import BasicNewsRecipe
117+import re
118+
119+class f1ultra(BasicNewsRecipe):
120+ title = u'Formuła 1 - F1 ultra'
121+ __license__ = 'GPL v3'
122+ __author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>'
123+ language = 'pl'
124+ description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.'
125+ masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif'
126+ remove_empty_feeds= True
127+ oldest_article = 1
128+ max_articles_per_feed = 100
129+ remove_javascript=True
130+ no_stylesheets=True
131+
132+ keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))]
133+ remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})]
134+ remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))]
135+ remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']}))
136+ remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'}))
137+ remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'}))
138+
139+ preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''),
140+ (re.compile(r'align="right"'), lambda match: ''),
141+ (re.compile(r'width=\"*\"'), lambda match: ''),
142+ (re.compile(r'\<table .*?\>'), lambda match: '')]
143+
144+
145+ extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; }
146+ img { display: block; clear: both;}
147+ '''
148+ remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align']
149+
150+ feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')]
151
152=== added file 'recipes/icons/antyweb.png'
153Binary files recipes/icons/antyweb.png 1970-01-01 00:00:00 +0000 and recipes/icons/antyweb.png 2012-11-18 17:55:22 +0000 differ
154=== added file 'recipes/icons/bankier_pl.png'
155Binary files recipes/icons/bankier_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/bankier_pl.png 2012-11-18 17:55:22 +0000 differ
156=== added file 'recipes/icons/f1_ultra.png'
157Binary files recipes/icons/f1_ultra.png 1970-01-01 00:00:00 +0000 and recipes/icons/f1_ultra.png 2012-11-18 17:55:22 +0000 differ
158=== added file 'recipes/icons/myapple_pl.png'
159Binary files recipes/icons/myapple_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/myapple_pl.png 2012-11-18 17:55:22 +0000 differ
160=== added file 'recipes/icons/telepolis_pl.png'
161Binary files recipes/icons/telepolis_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/telepolis_pl.png 2012-11-18 17:55:22 +0000 differ
162=== added file 'recipes/myapple_pl.recipe'
163--- recipes/myapple_pl.recipe 1970-01-01 00:00:00 +0000
164+++ recipes/myapple_pl.recipe 2012-11-18 17:55:22 +0000
165@@ -0,0 +1,50 @@
166+import re
167+
168+from calibre.web.feeds.news import BasicNewsRecipe
169+
170+class MyAppleRecipe(BasicNewsRecipe):
171+ __license__ = 'GPL v3'
172+ __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>'
173+ language = 'pl'
174+ version = 1
175+
176+ title = u'MyApple.pl'
177+ category = u'News'
178+ description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.'
179+ cover_url=''
180+ remove_empty_feeds= True
181+ no_stylesheets=True
182+ oldest_article = 7
183+ max_articles_per_feed = 100000
184+ recursions = 0
185+
186+ no_stylesheets = True
187+ remove_javascript = True
188+ simultaneous_downloads = 3
189+
190+ keep_only_tags =[]
191+ keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'}))
192+
193+ remove_tags =[]
194+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'}))
195+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'}))
196+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'}))
197+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'}))
198+ remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'}))
199+
200+
201+ extra_css = '''
202+ body {font-family: verdana, arial, helvetica, geneva, sans-serif ;}
203+ td.contentheading{font-size: large; font-weight: bold;}
204+ '''
205+
206+ feeds = [
207+ ('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent&sectionid=1&days=120&count=10'),
208+ ]
209+
210+ def preprocess_html(self, soup):
211+ for alink in soup.findAll('a'):
212+ if alink.string is not None:
213+ tstr = alink.string
214+ alink.replaceWith(tstr)
215+ return soup
216\ No newline at end of file
217
218=== added file 'recipes/telepolis_pl.recipe'
219--- recipes/telepolis_pl.recipe 1970-01-01 00:00:00 +0000
220+++ recipes/telepolis_pl.recipe 2012-11-18 17:55:22 +0000
221@@ -0,0 +1,67 @@
222+#!/usr/bin/env python
223+
224+__license__ = 'GPL v3'
225+
226+from calibre.web.feeds.news import BasicNewsRecipe
227+import re
228+
229+
230+class telepolis(BasicNewsRecipe):
231+ title = u'Telepolis.pl'
232+ __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
233+ language = 'pl'
234+ description = u'Twój telekomunikacyjny serwis informacyjny.\
235+ Codzienne informacje, testy i artykuły,\
236+ promocje, baza telefonów oraz centrum rozrywki'
237+ oldest_article = 7
238+ masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
239+ max_articles_per_feed = 100
240+ simultaneous_downloads = 5
241+ remove_javascript = True
242+ no_stylesheets = True
243+ use_embedded_content = False
244+
245+ remove_tags = []
246+ remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'}))
247+
248+ preprocess_regexps = [(re.compile(r'<: .*? :>'),
249+ lambda match: ''),
250+ (re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL),
251+ lambda match: ''),
252+ (re.compile(r'<-ankieta.*?>'),
253+ lambda match: ''),
254+ (re.compile(r'\(Q\!\)'),
255+ lambda match: ''),
256+ (re.compile(r'\(plik.*?\)'),
257+ lambda match: ''),
258+ (re.compile(r'<br.*?><br.*?>', re.DOTALL),
259+ lambda match: '')
260+ ]
261+
262+ extra_css = '''.tb { font-weight: bold; font-size: 20px;}'''
263+
264+ feeds = [
265+ (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'),
266+ (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
267+ ]
268+
269+ def print_version(self, url):
270+ if 'news.php' in url:
271+ print_url = url.replace('news.php', 'news_print.php')
272+ else:
273+ print_url = url.replace('artykuly.php', 'art_print.php')
274+ return print_url
275+
276+ def preprocess_html(self, soup):
277+ for image in soup.findAll('img'):
278+ if 'm.jpg' in image['src']:
279+ image_big = image['src']
280+ image_big = image_big.replace('m.jpg', '.jpg')
281+ image['src'] = image_big
282+ logo = soup.find('tr')
283+ logo.extract()
284+ for tag in soup.findAll('tr'):
285+ for strings in ['Wiadomość wydrukowana', 'copyright']:
286+ if strings in self.tag_to_string(tag):
287+ tag.extract()
288+ return self.adeify_images(soup)

Subscribers

People subscribed via source and target branches