Merge lp:~tomek3d/calibre/kalibrator into lp:calibre
- kalibrator
- Merge into trunk
Proposed by
Tomasz Długosz
Status: | Merged |
---|---|
Merged at revision: | 13703 |
Proposed branch: | lp:~tomek3d/calibre/kalibrator |
Merge into: | lp:calibre |
Diff against target: |
288 lines (+252/-0) 5 files modified
recipes/antyweb.recipe (+49/-0) recipes/bankier_pl.recipe (+51/-0) recipes/f1_ultra.recipe (+35/-0) recipes/myapple_pl.recipe (+50/-0) recipes/telepolis_pl.recipe (+67/-0) |
To merge this branch: | bzr merge lp:~tomek3d/calibre/kalibrator |
Related bugs: |
Reviewer | Review Type | Date Requested | Status |
---|---|---|---|
Kovid Goyal | Pending | ||
Review via email: mp+134812@code.launchpad.net |
Commit message
Description of the change
To post a comment you must log in.
Preview Diff
[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1 | === added file 'recipes/antyweb.recipe' |
2 | --- recipes/antyweb.recipe 1970-01-01 00:00:00 +0000 |
3 | +++ recipes/antyweb.recipe 2012-11-18 17:55:22 +0000 |
4 | @@ -0,0 +1,49 @@ |
5 | +import re |
6 | + |
7 | +from calibre.web.feeds.news import BasicNewsRecipe |
8 | + |
9 | +class AntywebRecipe(BasicNewsRecipe): |
10 | + encoding = 'utf-8' |
11 | + __license__ = 'GPL v3' |
12 | + __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>' |
13 | + language = 'pl' |
14 | + version = 1 |
15 | + title = u'Antyweb' |
16 | + category = u'News' |
17 | + description = u'Blog o internecie i nowych technologiach' |
18 | + cover_url='' |
19 | + remove_empty_feeds= True |
20 | + auto_cleanup = False |
21 | + no_stylesheets=True |
22 | + use_embedded_content = False |
23 | + oldest_article = 1 |
24 | + max_articles_per_feed = 100 |
25 | + remove_javascript = True |
26 | + simultaneous_downloads = 3 |
27 | + |
28 | + keep_only_tags =[] |
29 | + keep_only_tags.append(dict(name = 'h1', attrs = { 'class' : 'mm-article-title'})) |
30 | + keep_only_tags.append(dict(name = 'div', attrs = {'class' : 'mm-article-content'})) |
31 | + |
32 | + |
33 | + remove_tags =[] |
34 | + remove_tags.append(dict(name = 'h2', attrs = {'class' : 'widgettitle'})) |
35 | + remove_tags.append(dict(name = 'img', attrs = {'class' : 'alignleft'})) |
36 | + remove_tags.append(dict(name = 'div', attrs = {'class' : 'float: right;margin-left:1em;margin-bottom: 0.5em;padding-bottom: 3px; width: 72px;'})) |
37 | + remove_tags.append(dict(name = 'img', attrs = {'src' : 'http://antyweb.pl/wp-content/uploads/2011/09/HOSTERSI_testy_pasek600x30.gif'})) |
38 | + remove_tags.append(dict(name = 'div', attrs = {'class' : 'podwpisowe'})) |
39 | + |
40 | + |
41 | + extra_css = ''' |
42 | + body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} |
43 | + ''' |
44 | + |
45 | + feeds = [ |
46 | + (u'Artykuly', u'feed://feeds.feedburner.com/Antyweb?format=xml'), |
47 | + ] |
48 | + def preprocess_html(self, soup): |
49 | + for alink in soup.findAll('a'): |
50 | + if alink.string is not None: |
51 | + tstr = alink.string |
52 | + alink.replaceWith(tstr) |
53 | + return soup |
54 | |
55 | === added file 'recipes/bankier_pl.recipe' |
56 | --- recipes/bankier_pl.recipe 1970-01-01 00:00:00 +0000 |
57 | +++ recipes/bankier_pl.recipe 2012-11-18 17:55:22 +0000 |
58 | @@ -0,0 +1,51 @@ |
59 | +#!/usr/bin/env python |
60 | + |
61 | +__license__ = 'GPL v3' |
62 | +__author__ = 'teepel <teepel44@gmail.com>' |
63 | + |
64 | +''' |
65 | +bankier.pl |
66 | +''' |
67 | + |
68 | +from calibre.web.feeds.news import BasicNewsRecipe |
69 | +import re |
70 | + |
71 | +class bankier(BasicNewsRecipe): |
72 | + title = u'Bankier.pl' |
73 | + __author__ = 'teepel <teepel44@gmail.com>' |
74 | + language = 'pl' |
75 | + description ='Polski portal finansowy. Informacje o: gospodarka, inwestowanie, finanse osobiste, prowadzenie firmy, kursy walut, notowania akcji, fundusze.' |
76 | + masthead_url='http://www.bankier.pl/gfx/hd-mid-02.gif' |
77 | + INDEX='http://bankier.pl/' |
78 | + remove_empty_feeds= True |
79 | + oldest_article = 1 |
80 | + max_articles_per_feed = 100 |
81 | + remove_javascript=True |
82 | + no_stylesheets=True |
83 | + simultaneous_downloads = 5 |
84 | + |
85 | + keep_only_tags =[] |
86 | + keep_only_tags.append(dict(name = 'div', attrs = {'align' : 'left'})) |
87 | + |
88 | + remove_tags =[] |
89 | + remove_tags.append(dict(name = 'table', attrs = {'cellspacing' : '2'})) |
90 | + remove_tags.append(dict(name = 'div', attrs = {'align' : 'center'})) |
91 | + remove_tags.append(dict(name = 'img', attrs = {'src' : '/gfx/hd-mid-02.gif'})) |
92 | + #remove_tags.append(dict(name = 'a', attrs = {'target' : '_blank'})) |
93 | + #remove_tags.append(dict(name = 'br', attrs = {'clear' : 'all'})) |
94 | + |
95 | + feeds = [ |
96 | + (u'Wiadomości dnia', u'http://feeds.feedburner.com/bankier-wiadomosci-dnia'), |
97 | + (u'Finanse osobiste', u'http://feeds.feedburner.com/bankier-finanse-osobiste'), |
98 | + (u'Firma', u'http://feeds.feedburner.com/bankier-firma'), |
99 | + (u'Giełda', u'http://feeds.feedburner.com/bankier-gielda'), |
100 | + (u'Rynek walutowy', u'http://feeds.feedburner.com/bankier-rynek-walutowy'), |
101 | + (u'Komunikaty ze spółek', u'http://feeds.feedburner.com/bankier-espi'), |
102 | + ] |
103 | + def print_version(self, url): |
104 | + segment = url.split('.') |
105 | + urlPart = segment[2] |
106 | + segments = urlPart.split('-') |
107 | + urlPart2 = segments[-1] |
108 | + return 'http://www.bankier.pl/wiadomosci/print.html?article_id=' + urlPart2 |
109 | + |
110 | \ No newline at end of file |
111 | |
112 | === added file 'recipes/f1_ultra.recipe' |
113 | --- recipes/f1_ultra.recipe 1970-01-01 00:00:00 +0000 |
114 | +++ recipes/f1_ultra.recipe 2012-11-18 17:55:22 +0000 |
115 | @@ -0,0 +1,35 @@ |
116 | +from calibre.web.feeds.news import BasicNewsRecipe |
117 | +import re |
118 | + |
119 | +class f1ultra(BasicNewsRecipe): |
120 | + title = u'Formuła 1 - F1 ultra' |
121 | + __license__ = 'GPL v3' |
122 | + __author__ = 'MrStefan <mrstefaan@gmail.com>, Artur Stachecki <artur.stachecki@gmail.com>' |
123 | + language = 'pl' |
124 | + description =u'Formuła 1, Robert Kubica, F3, GP2 oraz inne serie wyścigowe.' |
125 | + masthead_url='http://www.f1ultra.pl/templates/f1ultra/images/logo.gif' |
126 | + remove_empty_feeds= True |
127 | + oldest_article = 1 |
128 | + max_articles_per_feed = 100 |
129 | + remove_javascript=True |
130 | + no_stylesheets=True |
131 | + |
132 | + keep_only_tags =[(dict(name = 'div', attrs = {'id' : 'main'}))] |
133 | + remove_tags_after =[dict(attrs = {'style' : 'margin-top:5px;margin-bottom:5px;display: inline;'})] |
134 | + remove_tags =[(dict(attrs = {'class' : ['buttonheading', 'avPlayerContainer', 'createdate']}))] |
135 | + remove_tags.append(dict(attrs = {'title' : ['PDF', 'Drukuj', 'Email']})) |
136 | + remove_tags.append(dict(name = 'form', attrs = {'method' : 'post'})) |
137 | + remove_tags.append(dict(name = 'hr', attrs = {'size' : '2'})) |
138 | + |
139 | + preprocess_regexps = [(re.compile(r'align="left"'), lambda match: ''), |
140 | + (re.compile(r'align="right"'), lambda match: ''), |
141 | + (re.compile(r'width=\"*\"'), lambda match: ''), |
142 | + (re.compile(r'\<table .*?\>'), lambda match: '')] |
143 | + |
144 | + |
145 | + extra_css = '''.contentheading { font-size: 1.4em; font-weight: bold; } |
146 | + img { display: block; clear: both;} |
147 | + ''' |
148 | + remove_attributes = ['width','height','position','float','padding-left','padding-right','padding','text-align'] |
149 | + |
150 | + feeds = [(u'F1 Ultra', u'http://www.f1ultra.pl/index.php?option=com_rd_rss&id=1&Itemid=245')] |
151 | |
152 | === added file 'recipes/icons/antyweb.png' |
153 | Binary files recipes/icons/antyweb.png 1970-01-01 00:00:00 +0000 and recipes/icons/antyweb.png 2012-11-18 17:55:22 +0000 differ |
154 | === added file 'recipes/icons/bankier_pl.png' |
155 | Binary files recipes/icons/bankier_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/bankier_pl.png 2012-11-18 17:55:22 +0000 differ |
156 | === added file 'recipes/icons/f1_ultra.png' |
157 | Binary files recipes/icons/f1_ultra.png 1970-01-01 00:00:00 +0000 and recipes/icons/f1_ultra.png 2012-11-18 17:55:22 +0000 differ |
158 | === added file 'recipes/icons/myapple_pl.png' |
159 | Binary files recipes/icons/myapple_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/myapple_pl.png 2012-11-18 17:55:22 +0000 differ |
160 | === added file 'recipes/icons/telepolis_pl.png' |
161 | Binary files recipes/icons/telepolis_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/telepolis_pl.png 2012-11-18 17:55:22 +0000 differ |
162 | === added file 'recipes/myapple_pl.recipe' |
163 | --- recipes/myapple_pl.recipe 1970-01-01 00:00:00 +0000 |
164 | +++ recipes/myapple_pl.recipe 2012-11-18 17:55:22 +0000 |
165 | @@ -0,0 +1,50 @@ |
166 | +import re |
167 | + |
168 | +from calibre.web.feeds.news import BasicNewsRecipe |
169 | + |
170 | +class MyAppleRecipe(BasicNewsRecipe): |
171 | + __license__ = 'GPL v3' |
172 | + __author__ = u'Artur Stachecki <artur.stachecki@gmail.com>' |
173 | + language = 'pl' |
174 | + version = 1 |
175 | + |
176 | + title = u'MyApple.pl' |
177 | + category = u'News' |
178 | + description = u' Największy w Polsce serwis zajmujący się tematyką związaną z Apple i wszelkimi produktami tej firmy.' |
179 | + cover_url='' |
180 | + remove_empty_feeds= True |
181 | + no_stylesheets=True |
182 | + oldest_article = 7 |
183 | + max_articles_per_feed = 100000 |
184 | + recursions = 0 |
185 | + |
186 | + no_stylesheets = True |
187 | + remove_javascript = True |
188 | + simultaneous_downloads = 3 |
189 | + |
190 | + keep_only_tags =[] |
191 | + keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'article_content'})) |
192 | + |
193 | + remove_tags =[] |
194 | + remove_tags.append(dict(name = 'div', attrs = {'class' : 'article_author_date_comment_container'})) |
195 | + remove_tags.append(dict(name = 'div', attrs = {'class' : 'fullwidth'})) |
196 | + remove_tags.append(dict(name = 'div', attrs = {'class' : 'cmslinks'})) |
197 | + remove_tags.append(dict(name = 'div', attrs = {'class' : 'googleads-468'})) |
198 | + remove_tags.append(dict(name = 'div', attrs = {'id' : 'comments'})) |
199 | + |
200 | + |
201 | + extra_css = ''' |
202 | + body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} |
203 | + td.contentheading{font-size: large; font-weight: bold;} |
204 | + ''' |
205 | + |
206 | + feeds = [ |
207 | + ('News', 'feed://myapple.pl/external.php?do=rss&type=newcontent§ionid=1&days=120&count=10'), |
208 | + ] |
209 | + |
210 | + def preprocess_html(self, soup): |
211 | + for alink in soup.findAll('a'): |
212 | + if alink.string is not None: |
213 | + tstr = alink.string |
214 | + alink.replaceWith(tstr) |
215 | + return soup |
216 | \ No newline at end of file |
217 | |
218 | === added file 'recipes/telepolis_pl.recipe' |
219 | --- recipes/telepolis_pl.recipe 1970-01-01 00:00:00 +0000 |
220 | +++ recipes/telepolis_pl.recipe 2012-11-18 17:55:22 +0000 |
221 | @@ -0,0 +1,67 @@ |
222 | +#!/usr/bin/env python |
223 | + |
224 | +__license__ = 'GPL v3' |
225 | + |
226 | +from calibre.web.feeds.news import BasicNewsRecipe |
227 | +import re |
228 | + |
229 | + |
230 | +class telepolis(BasicNewsRecipe): |
231 | + title = u'Telepolis.pl' |
232 | + __author__ = 'Artur Stachecki <artur.stachecki@gmail.com>' |
233 | + language = 'pl' |
234 | + description = u'Twój telekomunikacyjny serwis informacyjny.\ |
235 | + Codzienne informacje, testy i artykuły,\ |
236 | + promocje, baza telefonów oraz centrum rozrywki' |
237 | + oldest_article = 7 |
238 | + masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif' |
239 | + max_articles_per_feed = 100 |
240 | + simultaneous_downloads = 5 |
241 | + remove_javascript = True |
242 | + no_stylesheets = True |
243 | + use_embedded_content = False |
244 | + |
245 | + remove_tags = [] |
246 | + remove_tags.append(dict(attrs={'alt': 'TELEPOLIS.pl'})) |
247 | + |
248 | + preprocess_regexps = [(re.compile(r'<: .*? :>'), |
249 | + lambda match: ''), |
250 | + (re.compile(r'<b>Zobacz:</b>.*?</a>', re.DOTALL), |
251 | + lambda match: ''), |
252 | + (re.compile(r'<-ankieta.*?>'), |
253 | + lambda match: ''), |
254 | + (re.compile(r'\(Q\!\)'), |
255 | + lambda match: ''), |
256 | + (re.compile(r'\(plik.*?\)'), |
257 | + lambda match: ''), |
258 | + (re.compile(r'<br.*?><br.*?>', re.DOTALL), |
259 | + lambda match: '') |
260 | + ] |
261 | + |
262 | + extra_css = '''.tb { font-weight: bold; font-size: 20px;}''' |
263 | + |
264 | + feeds = [ |
265 | + (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php'), |
266 | + (u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') |
267 | + ] |
268 | + |
269 | + def print_version(self, url): |
270 | + if 'news.php' in url: |
271 | + print_url = url.replace('news.php', 'news_print.php') |
272 | + else: |
273 | + print_url = url.replace('artykuly.php', 'art_print.php') |
274 | + return print_url |
275 | + |
276 | + def preprocess_html(self, soup): |
277 | + for image in soup.findAll('img'): |
278 | + if 'm.jpg' in image['src']: |
279 | + image_big = image['src'] |
280 | + image_big = image_big.replace('m.jpg', '.jpg') |
281 | + image['src'] = image_big |
282 | + logo = soup.find('tr') |
283 | + logo.extract() |
284 | + for tag in soup.findAll('tr'): |
285 | + for strings in ['Wiadomość wydrukowana', 'copyright']: |
286 | + if strings in self.tag_to_string(tag): |
287 | + tag.extract() |
288 | + return self.adeify_images(soup) |