Merge lp:~tomek3d/calibre/kalibrator into lp:calibre

Proposed by Tomasz Długosz
Status: Merged
Merged at revision: 14541
Proposed branch: lp:~tomek3d/calibre/kalibrator
Merge into: lp:calibre
Diff against target: 2089 lines (+1239/-180)
56 files modified
recipes/adventure_zone_pl.recipe (+2/-2)
recipes/archeowiesci.recipe (+2/-1)
recipes/astro_news_pl.recipe (+1/-1)
recipes/astroflesz.recipe (+1/-0)
recipes/astronomia_pl.recipe (+1/-1)
recipes/bash_org_pl.recipe (+2/-2)
recipes/benchmark_pl.recipe (+11/-9)
recipes/biweekly.recipe (+51/-0)
recipes/cd_action.recipe (+2/-2)
recipes/computerworld_pl.recipe (+6/-10)
recipes/conowego_pl.recipe (+9/-1)
recipes/czas_gentlemanow.recipe (+4/-2)
recipes/dobreprogamy.recipe (+1/-1)
recipes/dwutygodnik.recipe (+51/-0)
recipes/dzieje_pl.recipe (+2/-2)
recipes/dziennik_baltycki.recipe (+34/-0)
recipes/dziennik_lodzki.recipe (+35/-0)
recipes/dziennik_wschodni.recipe (+78/-0)
recipes/dziennik_zachodni.recipe (+34/-0)
recipes/echo_dnia.recipe (+74/-0)
recipes/eioba.recipe (+1/-0)
recipes/elektroda_pl.recipe (+1/-1)
recipes/emuzica_pl.recipe (+2/-1)
recipes/film_web.recipe (+7/-7)
recipes/focus_pl.recipe (+1/-1)
recipes/fotoblogia_pl.recipe (+2/-1)
recipes/gazeta_krakowska.recipe (+34/-0)
recipes/gazeta_lubuska.recipe (+64/-0)
recipes/gazeta_pomorska.recipe (+84/-95)
recipes/gazeta_wroclawska.recipe (+34/-0)
recipes/gazeta_wspolczesna.recipe (+63/-0)
recipes/gazeta_wyborcza.recipe (+1/-1)
recipes/gcn.recipe (+83/-0)
recipes/glos_wielkopolski.recipe (+34/-0)
recipes/gram_pl.recipe (+4/-5)
recipes/gry_online_pl.recipe (+54/-9)
recipes/jazzpress.recipe (+50/-0)
recipes/konflikty_zbrojne.recipe (+1/-1)
recipes/kosmonauta_pl.recipe (+2/-3)
recipes/kurier_lubelski.recipe (+34/-0)
recipes/kurier_poranny.recipe (+78/-0)
recipes/kurier_szczecinski.recipe (+27/-0)
recipes/lomza.recipe (+1/-1)
recipes/mlody_technik_pl.recipe (+9/-2)
recipes/niebezpiecznik.recipe (+3/-3)
recipes/nowa_fantastyka.recipe (+1/-1)
recipes/nto.recipe (+63/-0)
recipes/pc_foster.recipe (+7/-7)
recipes/polska_times.recipe (+3/-1)
recipes/spiders_web_pl.recipe (+1/-1)
recipes/tablety_pl.recipe (+1/-1)
recipes/tanuki.recipe (+2/-1)
recipes/trojmiasto_pl.recipe (+37/-0)
recipes/tvn24.recipe (+2/-2)
recipes/ubuntu_pl.recipe (+1/-1)
recipes/zycie_warszawy.recipe (+46/-0)
To merge this branch: bzr merge lp:~tomek3d/calibre/kalibrator
Reviewer Review Type Date Requested Status
Kovid Goyal Pending
Review via email: mp+152041@code.launchpad.net

Description of the change

I have two new commiters in my git repository. Here is a bunch of recipes submitted by them.

To post a comment you must log in.
Revision history for this message
Kovid Goyal (kovid) wrote :

There are a few recipes in there that call ebook-convert to get an opf from an epub. This will break if ebook-convert is not in the PATH (which happens for example on OS X). Instead they should use the main() function from conversion/cli.py

I have merged replacing the ebook-convert call with a simple unzip + search for opf as there is no need to do a full conversion just to unpack the epub.

Preview Diff

[H/L] Next/Prev Comment, [J/K] Next/Prev File, [N/P] Next/Prev Hunk
1=== modified file 'recipes/adventure_zone_pl.recipe'
2--- recipes/adventure_zone_pl.recipe 2013-02-16 15:44:46 +0000
3+++ recipes/adventure_zone_pl.recipe 2013-03-06 19:41:20 +0000
4@@ -3,7 +3,7 @@
5 class Adventure_zone(BasicNewsRecipe):
6 title = u'Adventure Zone'
7 __author__ = 'fenuks'
8- description = u'Adventure zone - adventure games from A to Z'
9+ description = u'Czytaj więcej o przygodzie - codzienne nowinki. Szukaj u nas solucji i poradników, czytaj recenzje i zapowiedzi. Także galeria, pliki oraz forum dla wszystkich fanów gier przygodowych.'
10 category = 'games'
11 language = 'pl'
12 no_stylesheets = True
13@@ -78,4 +78,4 @@
14 a['href']=self.index + a['href']
15 return soup
16
17-
18\ No newline at end of file
19+
20
21=== modified file 'recipes/archeowiesci.recipe'
22--- recipes/archeowiesci.recipe 2013-01-25 12:49:36 +0000
23+++ recipes/archeowiesci.recipe 2013-03-06 19:41:20 +0000
24@@ -5,6 +5,7 @@
25 __author__ = 'fenuks'
26 category = 'archeology'
27 language = 'pl'
28+ description = u'Z pasją o przeszłości'
29 cover_url='http://archeowiesci.pl/wp-content/uploads/2011/05/Archeowiesci2-115x115.jpg'
30 oldest_article = 7
31 needs_subscription='optional'
32@@ -29,4 +30,4 @@
33 br['log'] = self.username
34 br['pwd'] = self.password
35 br.submit()
36- return br
37\ No newline at end of file
38+ return br
39
40=== modified file 'recipes/astro_news_pl.recipe'
41--- recipes/astro_news_pl.recipe 2011-12-17 16:02:28 +0000
42+++ recipes/astro_news_pl.recipe 2013-03-06 19:41:20 +0000
43@@ -2,7 +2,7 @@
44 class AstroNEWS(BasicNewsRecipe):
45 title = u'AstroNEWS'
46 __author__ = 'fenuks'
47- description = 'AstroNEWS- astronomy every day'
48+ description = u'AstroNEWS regularnie dostarcza wiadomości o wydarzeniach związanych z astronomią i astronautyką. Informujemy o aktualnych odkryciach i wydarzeniach naukowych, zapowiadamy ciekawe zjawiska astronomiczne. Serwis jest częścią portalu astronomicznego AstroNET prowadzonego przez miłośników astronomii i zawodowych astronomów.'
49 category = 'astronomy, science'
50 language = 'pl'
51 oldest_article = 8
52
53=== modified file 'recipes/astroflesz.recipe'
54--- recipes/astroflesz.recipe 2012-12-14 12:01:34 +0000
55+++ recipes/astroflesz.recipe 2013-03-06 19:41:20 +0000
56@@ -13,6 +13,7 @@
57 max_articles_per_feed = 100
58 no_stylesheets = True
59 use_embedded_content = False
60+ remove_attributes = ['style']
61 keep_only_tags = [dict(id="k2Container")]
62 remove_tags_after = dict(name='div', attrs={'class':'itemLinks'})
63 remove_tags = [dict(name='div', attrs={'class':['itemLinks', 'itemToolbar', 'itemRatingBlock']})]
64
65=== modified file 'recipes/astronomia_pl.recipe'
66--- recipes/astronomia_pl.recipe 2012-02-20 04:34:40 +0000
67+++ recipes/astronomia_pl.recipe 2013-03-06 19:41:20 +0000
68@@ -3,7 +3,7 @@
69 class Astronomia_pl(BasicNewsRecipe):
70 title = u'Astronomia.pl'
71 __author__ = 'fenuks'
72- description = 'Astronomia - polish astronomy site'
73+ description = u'Astronomia.pl jest edukacyjnym portalem skierowanym do uczniów, studentów i miłośników astronomii. Przedstawiamy gwiazdy, planety, galaktyki, czarne dziury i wiele innych tajemnic Wszechświata.'
74 masthead_url = 'http://www.astronomia.pl/grafika/logo.gif'
75 cover_url = 'http://www.astronomia.pl/grafika/logo.gif'
76 category = 'astronomy, science'
77
78=== modified file 'recipes/bash_org_pl.recipe'
79--- recipes/bash_org_pl.recipe 2013-02-16 15:44:46 +0000
80+++ recipes/bash_org_pl.recipe 2013-03-06 19:41:20 +0000
81@@ -3,7 +3,7 @@
82 class Bash_org_pl(BasicNewsRecipe):
83 title = u'Bash.org.pl'
84 __author__ = 'fenuks'
85- description = 'Bash.org.pl - funny quotations from IRC discussions'
86+ description = 'Bash.org.pl - zabawne cytaty z IRC'
87 category = 'funny quotations, humour'
88 language = 'pl'
89 cover_url = u'http://userlogos.org/files/logos/dzikiosiol/none_0.png'
90@@ -51,4 +51,4 @@
91 feeds = []
92 feeds.append((u"Najnowsze", self.latest_articles()))
93 feeds.append((u"Losowe", self.random_articles()))
94- return feeds
95\ No newline at end of file
96+ return feeds
97
98=== modified file 'recipes/benchmark_pl.recipe'
99--- recipes/benchmark_pl.recipe 2012-10-17 14:12:08 +0000
100+++ recipes/benchmark_pl.recipe 2013-03-06 19:41:20 +0000
101@@ -3,14 +3,15 @@
102 class BenchmarkPl(BasicNewsRecipe):
103 title = u'Benchmark.pl'
104 __author__ = 'fenuks'
105- description = u'benchmark.pl -IT site'
106+ description = u'benchmark.pl, recenzje i testy sprzętu, aktualności, rankingi, sterowniki, porady, opinie'
107 masthead_url = 'http://www.benchmark.pl/i/logo-footer.png'
108- cover_url = 'http://www.ieaddons.pl/benchmark/logo_benchmark_new.gif'
109+ cover_url = 'http://www.benchmark.pl/i/logo-dark.png'
110 category = 'IT'
111 language = 'pl'
112 oldest_article = 8
113 max_articles_per_feed = 100
114- no_stylesheets=True
115+ no_stylesheets = True
116+ remove_attributes = ['style']
117 preprocess_regexps = [(re.compile(ur'<h3><span style="font-size: small;">&nbsp;Zobacz poprzednie <a href="http://www.benchmark.pl/news/zestawienie/grupa_id/135">Opinie dnia:</a></span>.*</body>', re.DOTALL|re.IGNORECASE), lambda match: '</body>'), (re.compile(ur'Więcej o .*?</ul>', re.DOTALL|re.IGNORECASE), lambda match: '')]
118 keep_only_tags=[dict(name='div', attrs={'class':['m_zwykly', 'gallery']}), dict(id='article')]
119 remove_tags_after=dict(name='div', attrs={'class':'body'})
120@@ -21,17 +22,18 @@
121
122
123 def append_page(self, soup, appendtag):
124- nexturl = soup.find('span', attrs={'class':'next'})
125- while nexturl is not None:
126- nexturl= self.INDEX + nexturl.parent['href']
127- soup2 = self.index_to_soup(nexturl)
128- nexturl=soup2.find('span', attrs={'class':'next'})
129+ nexturl = soup.find(attrs={'class':'next'})
130+ while nexturl:
131+ soup2 = self.index_to_soup(nexturl['href'])
132+ nexturl = soup2.find(attrs={'class':'next'})
133 pagetext = soup2.find(name='div', attrs={'class':'body'})
134 appendtag.find('div', attrs={'class':'k_ster'}).extract()
135 pos = len(appendtag.contents)
136 appendtag.insert(pos, pagetext)
137- if appendtag.find('div', attrs={'class':'k_ster'}) is not None:
138+ if appendtag.find('div', attrs={'class':'k_ster'}):
139 appendtag.find('div', attrs={'class':'k_ster'}).extract()
140+ for r in appendtag.findAll(attrs={'class':'changePage'}):
141+ r.extract()
142
143
144 def image_article(self, soup, appendtag):
145
146=== added file 'recipes/biweekly.recipe'
147--- recipes/biweekly.recipe 1970-01-01 00:00:00 +0000
148+++ recipes/biweekly.recipe 2013-03-06 19:41:20 +0000
149@@ -0,0 +1,51 @@
150+#!/usr/bin/env python
151+# -*- coding: utf-8 -*-
152+
153+__license__ = 'GPL v3'
154+__copyright__ = u'Łukasz Grąbczewski 2011'
155+__version__ = '2.0'
156+
157+import re, os
158+from calibre.ptempfile import PersistentTemporaryFile
159+from calibre.ebooks.conversion.cli import main
160+
161+class biweekly(BasicNewsRecipe):
162+ __author__ = u'Łukasz Grąbczewski'
163+ title = 'Biweekly'
164+ language = 'en_EN'
165+ publisher = 'National Audiovisual Institute'
166+ publication_type = 'magazine'
167+ description = u'link with culture [English edition of Polish magazine]: literature, theatre, film, art, music, views, talks'
168+
169+ conversion_options = {
170+ 'authors' : 'Biweekly.pl'
171+ ,'publisher' : publisher
172+ ,'language' : language
173+ ,'comments' : description
174+ ,'no_default_epub_cover' : True
175+ ,'preserve_cover_aspect_ratio': True
176+ }
177+
178+ def build_index(self):
179+ browser = self.get_browser()
180+ rc = browser.open('http://www.biweekly.pl/')
181+
182+ # find the link
183+ epublink = browser.find_link(text_regex=re.compile('ePUB VERSION'))
184+
185+ # download ebook
186+ self.report_progress(0,_('Downloading ePUB'))
187+ response = browser.follow_link(epublink)
188+ book_file = PersistentTemporaryFile(suffix='.epub')
189+ book_file.write(response.read())
190+ book_file.close()
191+
192+ # convert
193+ self.report_progress(0.2,_('Converting to OEB'))
194+ oebdir = self.output_dir + '/INPUT/'
195+ main(['ebook-convert', book_file.name, oebdir])
196+
197+ # feed calibre
198+ index = os.path.join(oebdir, 'content.opf')
199+
200+ return index
201
202=== modified file 'recipes/cd_action.recipe'
203--- recipes/cd_action.recipe 2012-04-18 04:03:44 +0000
204+++ recipes/cd_action.recipe 2013-03-06 19:41:20 +0000
205@@ -3,7 +3,7 @@
206 class CD_Action(BasicNewsRecipe):
207 title = u'CD-Action'
208 __author__ = 'fenuks'
209- description = 'cdaction.pl - polish games magazine site'
210+ description = 'Strona CD-Action (CDA), największego w Polsce pisma dla graczy.Pełne wersje gier, newsy, recenzje, zapowiedzi, konkursy, forum, opinie, galerie screenów,trailery, filmiki, patche, teksty. Gry komputerowe (PC) oraz na konsole (PS3, XBOX 360).'
211 category = 'games'
212 language = 'pl'
213 index='http://www.cdaction.pl'
214@@ -24,4 +24,4 @@
215 for a in soup('a'):
216 if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
217 a['href']=self.index + a['href']
218- return soup
219\ No newline at end of file
220+ return soup
221
222=== modified file 'recipes/computerworld_pl.recipe'
223--- recipes/computerworld_pl.recipe 2012-02-20 04:34:40 +0000
224+++ recipes/computerworld_pl.recipe 2013-03-06 19:41:20 +0000
225@@ -7,17 +7,13 @@
226 description = u'Serwis o IT w przemyśle, finansach, handlu, administracji oraz rynku IT i telekomunikacyjnym - wiadomości, opinie, analizy, porady prawne'
227 category = 'IT'
228 language = 'pl'
229- masthead_url= 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
230- no_stylesheets=True
231+ masthead_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
232+ cover_url = 'http://g1.computerworld.pl/cw/beta_gfx/cw2.gif'
233+ no_stylesheets = True
234 oldest_article = 7
235 max_articles_per_feed = 100
236- keep_only_tags=[dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})]
237- remove_tags_after=dict(name='div', attrs={'class':'rMobi'})
238- remove_tags=[dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
239+ keep_only_tags = [dict(attrs={'class':['tyt_news', 'prawo', 'autor', 'tresc']})]
240+ remove_tags_after = dict(name='div', attrs={'class':'rMobi'})
241+ remove_tags = [dict(name='div', attrs={'class':['nnav', 'rMobi']}), dict(name='table', attrs={'class':'ramka_slx'})]
242 feeds = [(u'Wiadomo\u015bci', u'http://rssout.idg.pl/cw/news_iso.xml')]
243
244- def get_cover_url(self):
245- soup = self.index_to_soup('http://www.computerworld.pl/')
246- cover=soup.find(name='img', attrs={'class':'prawo'})
247- self.cover_url=cover['src']
248- return getattr(self, 'cover_url', self.cover_url)
249
250=== modified file 'recipes/conowego_pl.recipe'
251--- recipes/conowego_pl.recipe 2013-01-20 08:18:34 +0000
252+++ recipes/conowego_pl.recipe 2013-03-06 19:41:20 +0000
253@@ -4,11 +4,12 @@
254 title = u'conowego.pl'
255 __author__ = 'fenuks'
256 description = u'Nowy wortal technologiczny oraz gazeta internetowa. Testy najnowszych produktów, fachowe porady i recenzje. U nas znajdziesz wszystko o elektronice użytkowej !'
257- cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png'
258+ #cover_url = 'http://www.conowego.pl/fileadmin/templates/main/images/logo_top.png'
259 category = 'IT, news'
260 language = 'pl'
261 oldest_article = 7
262 max_articles_per_feed = 100
263+ INDEX = 'http://www.conowego.pl/'
264 no_stylesheets = True
265 remove_empty_feeds = True
266 use_embedded_content = False
267@@ -36,3 +37,10 @@
268
269 for r in appendtag.findAll(attrs={'class':['pages', 'paginationWrap']}):
270 r.extract()
271+
272+ def get_cover_url(self):
273+ soup = self.index_to_soup('http://www.conowego.pl/magazyn/')
274+ tag = soup.find(attrs={'class':'ms_left'})
275+ if tag:
276+ self.cover_url = self.INDEX + tag.find('img')['src']
277+ return getattr(self, 'cover_url', self.cover_url)
278
279=== modified file 'recipes/czas_gentlemanow.recipe'
280--- recipes/czas_gentlemanow.recipe 2012-12-14 12:01:34 +0000
281+++ recipes/czas_gentlemanow.recipe 2013-03-06 19:41:20 +0000
282@@ -1,4 +1,5 @@
283 # vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:fdm=marker:ai
284+import re
285 from calibre.web.feeds.news import BasicNewsRecipe
286
287 class CzasGentlemanow(BasicNewsRecipe):
288@@ -13,8 +14,9 @@
289 max_articles_per_feed = 100
290 no_stylesheets = True
291 remove_empty_feeds = True
292+ preprocess_regexps = [(re.compile(u'<h3>Może Cię też zainteresować:</h3>'), lambda m: '')]
293 use_embedded_content = False
294 keep_only_tags = [dict(name='div', attrs={'class':'content'})]
295- remove_tags = [dict(attrs={'class':'meta_comments'})]
296- remove_tags_after = dict(name='div', attrs={'class':'fblikebutton_button'})
297+ remove_tags = [dict(attrs={'class':'meta_comments'}), dict(id=['comments', 'related_posts_thumbnails'])]
298+ remove_tags_after = dict(id='comments')
299 feeds = [(u'M\u0119ski \u015awiat', u'http://czasgentlemanow.pl/category/meski-swiat/feed/'), (u'Styl', u'http://czasgentlemanow.pl/category/styl/feed/'), (u'Vademecum Gentlemana', u'http://czasgentlemanow.pl/category/vademecum/feed/'), (u'Dom i rodzina', u'http://czasgentlemanow.pl/category/dom-i-rodzina/feed/'), (u'Honor', u'http://czasgentlemanow.pl/category/honor/feed/'), (u'Gad\u017cety Gentlemana', u'http://czasgentlemanow.pl/category/gadzety-gentlemana/feed/')]
300
301=== modified file 'recipes/dobreprogamy.recipe'
302--- recipes/dobreprogamy.recipe 2012-11-10 10:51:21 +0000
303+++ recipes/dobreprogamy.recipe 2013-03-06 19:41:20 +0000
304@@ -18,7 +18,7 @@
305 max_articles_per_feed = 100
306 preprocess_regexps = [(re.compile(ur'<div id="\S+360pmp4">Twoja przeglądarka nie obsługuje Flasha i HTML5 lub wyłączono obsługę JavaScript...</div>'), lambda match: '') ]
307 keep_only_tags=[dict(attrs={'class':['news', 'entry single']})]
308- remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze')]
309+ remove_tags = [dict(attrs={'class':['newsOptions', 'noPrint', 'komentarze', 'tags font-heading-master']}), dict(id='komentarze'), dict(name='iframe')]
310 #remove_tags = [dict(name='div', attrs={'class':['komentarze', 'block', 'portalInfo', 'menuBar', 'topBar']})]
311 feeds = [(u'Aktualności', 'http://feeds.feedburner.com/dobreprogramy/Aktualnosci'),
312 ('Blogi', 'http://feeds.feedburner.com/dobreprogramy/BlogCzytelnikow')]
313
314=== added file 'recipes/dwutygodnik.recipe'
315--- recipes/dwutygodnik.recipe 1970-01-01 00:00:00 +0000
316+++ recipes/dwutygodnik.recipe 2013-03-06 19:41:20 +0000
317@@ -0,0 +1,51 @@
318+#!/usr/bin/env python
319+# -*- coding: utf-8 -*-
320+
321+__license__ = 'GPL v3'
322+__copyright__ = u'Łukasz Grąbczewski 2011'
323+__version__ = '2.0'
324+
325+import re, os
326+from calibre.ptempfile import PersistentTemporaryFile
327+from calibre.ebooks.conversion.cli import main
328+
329+class dwutygodnik(BasicNewsRecipe):
330+ __author__ = u'Łukasz Grąbczewski'
331+ title = 'Dwutygodnik'
332+ language = 'pl_PL'
333+ publisher = 'Narodowy Instytut Audiowizualny'
334+ publication_type = 'magazine'
335+ description = u'Strona Kultury: literatura, teatr, film, sztuka, muzyka, felietony, rozmowy'
336+
337+ conversion_options = {
338+ 'authors' : 'Dwutygodnik.com'
339+ ,'publisher' : publisher
340+ ,'language' : language
341+ ,'comments' : description
342+ ,'no_default_epub_cover' : True
343+ ,'preserve_cover_aspect_ratio': True
344+ }
345+
346+ def build_index(self):
347+ browser = self.get_browser()
348+ rc = browser.open('http://www.dwutygodnik.com/')
349+
350+ # find the link
351+ epublink = browser.find_link(text_regex=re.compile('Wersja ePub'))
352+
353+ # download ebook
354+ self.report_progress(0,_('Downloading ePUB'))
355+ response = browser.follow_link(epublink)
356+ book_file = PersistentTemporaryFile(suffix='.epub')
357+ book_file.write(response.read())
358+ book_file.close()
359+
360+ # convert
361+ self.report_progress(0.2,_('Converting to OEB'))
362+ oebdir = self.output_dir + '/INPUT/'
363+ main(['ebook-convert', book_file.name, oebdir])
364+
365+ # feed calibre
366+ index = os.path.join(oebdir, 'content.opf')
367+
368+ return index
369
370=== modified file 'recipes/dzieje_pl.recipe'
371--- recipes/dzieje_pl.recipe 2012-12-14 09:32:01 +0000
372+++ recipes/dzieje_pl.recipe 2013-03-06 19:41:20 +0000
373@@ -3,7 +3,7 @@
374 class Dzieje(BasicNewsRecipe):
375 title = u'dzieje.pl'
376 __author__ = 'fenuks'
377- description = 'Dzieje - history of Poland'
378+ description = 'Dzieje.pl - najlepszy portal informacyjno-edukacyjny dotyczący historii Polski XX wieku. Archiwalne fotografie, filmy, katalog postaci, quizy i konkursy.'
379 cover_url = 'http://www.dzieje.pl/sites/default/files/dzieje_logo.png'
380 category = 'history'
381 language = 'pl'
382@@ -67,4 +67,4 @@
383 if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
384 a['href']=self.index + a['href']
385 self.append_page(soup, soup.body)
386- return soup
387\ No newline at end of file
388+ return soup
389
390=== added file 'recipes/dziennik_baltycki.recipe'
391--- recipes/dziennik_baltycki.recipe 1970-01-01 00:00:00 +0000
392+++ recipes/dziennik_baltycki.recipe 2013-03-06 19:41:20 +0000
393@@ -0,0 +1,34 @@
394+from calibre.web.feeds.news import BasicNewsRecipe
395+
396+class DziennikBaltycki(BasicNewsRecipe):
397+ title = u'Dziennik Ba\u0142tycki'
398+ __author__ = 'fenuks'
399+ description = u'Gazeta Regionalna Dziennik Bałtycki. Najnowsze Wiadomości Trójmiasto i Wiadomości Pomorskie. Czytaj!'
400+ category = 'newspaper'
401+ language = 'pl'
402+ encoding = 'iso-8859-2'
403+ masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/dziennikbaltycki.png?24'
404+ oldest_article = 7
405+ max_articles_per_feed = 100
406+ remove_empty_feeds= True
407+ no_stylesheets = True
408+ use_embedded_content = False
409+ ignore_duplicate_articles = {'title', 'url'}
410+ #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
411+ remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
412+ remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
413+
414+ feeds = [(u'Wiadomo\u015bci', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_wiadomosci.xml?201302'), (u'Sport', u'http://dziennikbaltycki.feedsportal.com/c/32980/f/533756/index.rss?201302'), (u'Rejsy', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_rejsy.xml?201302'), (u'Biznes na Pomorzu', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_biznesnapomorzu.xml?201302'), (u'GOM', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_gom.xml?201302'), (u'Opinie', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_opinie.xml?201302'), (u'Pitawal Pomorski', u'http://www.dziennikbaltycki.pl/rss/dziennikbaltycki_pitawalpomorski.xml?201302')]
415+
416+ def print_version(self, url):
417+ return url.replace('artykul', 'drukuj')
418+
419+ def skip_ad_pages(self, soup):
420+ if 'Advertisement' in soup.title:
421+ nexturl=soup.find('a')['href']
422+ return self.index_to_soup(nexturl, raw=True)
423+
424+ def get_cover_url(self):
425+ soup = self.index_to_soup('http://www.prasa24.pl/gazeta/dziennik-baltycki/')
426+ self.cover_url=soup.find(id='pojemnik').img['src']
427+ return getattr(self, 'cover_url', self.cover_url)
428\ No newline at end of file
429
430=== added file 'recipes/dziennik_lodzki.recipe'
431--- recipes/dziennik_lodzki.recipe 1970-01-01 00:00:00 +0000
432+++ recipes/dziennik_lodzki.recipe 2013-03-06 19:41:20 +0000
433@@ -0,0 +1,35 @@
434+from calibre.web.feeds.news import BasicNewsRecipe
435+
436+class DziennikLodzki(BasicNewsRecipe):
437+ title = u'Dziennik \u0141\xf3dzki'
438+ __author__ = 'fenuks'
439+ description = u'Gazeta Regionalna Dziennik Łódzki. Najnowsze Wiadomości Łódź. Czytaj Wiadomości Łódzkie!'
440+ category = 'newspaper'
441+ language = 'pl'
442+ encoding = 'iso-8859-2'
443+ masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/dzienniklodzki.png?24'
444+ oldest_article = 7
445+ max_articles_per_feed = 100
446+ remove_empty_feeds = True
447+ no_stylesheets = True
448+ use_embedded_content = False
449+ ignore_duplicate_articles = {'title', 'url'}
450+ #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
451+ remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
452+ remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
453+
454+ feeds = [(u'Na sygnale', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_nasygnale.xml?201302'), (u'\u0141\xf3d\u017a', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_lodz.xml?201302'), (u'Opinie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_opinie.xml?201302'), (u'Pieni\u0105dze', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533763/index.rss?201302'), (u'Kultura', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533762/index.rss?201302'), (u'Sport', u'http://dzienniklodzki.feedsportal.com/c/32980/f/533761/index.rss?201302'), (u'Akcje', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_akcje.xml?201302'), (u'M\xf3j Reporter', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_mojreporter.xml?201302'), (u'Studni\xf3wki', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_studniowki.xml?201302'), (u'Kraj', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_kraj.xml?201302'), (u'Zdrowie', u'http://www.dzienniklodzki.pl/rss/dzienniklodzki_zdrowie.xml?201302')]
455+
456+
457+ def print_version(self, url):
458+ return url.replace('artykul', 'drukuj')
459+
460+ def skip_ad_pages(self, soup):
461+ if 'Advertisement' in soup.title:
462+ nexturl=soup.find('a')['href']
463+ return self.index_to_soup(nexturl, raw=True)
464+
465+ def get_cover_url(self):
466+ soup = self.index_to_soup('http://www.prasa24.pl/gazeta/dziennik-lodzki/')
467+ self.cover_url=soup.find(id='pojemnik').img['src']
468+ return getattr(self, 'cover_url', self.cover_url)
469
470=== added file 'recipes/dziennik_wschodni.recipe'
471--- recipes/dziennik_wschodni.recipe 1970-01-01 00:00:00 +0000
472+++ recipes/dziennik_wschodni.recipe 2013-03-06 19:41:20 +0000
473@@ -0,0 +1,78 @@
474+import re
475+from calibre.web.feeds.news import BasicNewsRecipe
476+class DziennikWschodni(BasicNewsRecipe):
477+ title = u'Dziennik Wschodni'
478+ __author__ = 'fenuks'
479+ description = u'Dziennik Wschodni - portal regionalny województwa lubelskiego.'
480+ category = 'newspaper'
481+ language = 'pl'
482+ encoding = 'iso-8859-2'
483+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
484+ INDEX = 'http://www.dziennikwschodni.pl'
485+ masthead_url = INDEX + '/images/top_logo.png'
486+ oldest_article = 7
487+ max_articles_per_feed = 100
488+ remove_empty_feeds = True
489+ no_stylesheets = True
490+ ignore_duplicate_articles = {'title', 'url'}
491+
492+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
493+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
494+
495+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
496+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
497+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
498+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
499+ dict(attrs={'class':'articleFunctions'})]
500+
501+
502+ feeds = [(u'Wszystkie', u'http://www.dziennikwschodni.pl/rss.xml'),
503+ (u'Lublin', u'http://www.dziennikwschodni.pl/lublin.xml'),
504+ (u'Zamość', u'http://www.dziennikwschodni.pl/zamosc.xml'),
505+ (u'Biała Podlaska', u'http://www.dziennikwschodni.pl/biala_podlaska.xml'),
506+ (u'Chełm', u'http://www.dziennikwschodni.pl/chelm.xml'),
507+ (u'Kraśnik', u'http://www.dziennikwschodni.pl/krasnik.xml'),
508+ (u'Puławy', u'http://www.dziennikwschodni.pl/pulawy.xml'),
509+ (u'Świdnik', u'http://www.dziennikwschodni.pl/swidnik.xml'),
510+ (u'Łęczna', u'http://www.dziennikwschodni.pl/leczna.xml'),
511+ (u'Lubartów', u'http://www.dziennikwschodni.pl/lubartow.xml'),
512+ (u'Sport', u'http://www.dziennikwschodni.pl/sport.xml'),
513+ (u'Praca', u'http://www.dziennikwschodni.pl/praca.xml'),
514+ (u'Dom', u'http://www.dziennikwschodni.pl/dom.xml'),
515+ (u'Moto', u'http://www.dziennikwschodni.pl/moto.xml'),
516+ (u'Zdrowie', u'http://www.dziennikwschodni.pl/zdrowie.xml'),
517+ ]
518+
519+ def get_cover_url(self):
520+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
521+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
522+ soup = self.index_to_soup(nexturl)
523+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
524+ return getattr(self, 'cover_url', self.cover_url)
525+
526+ def append_page(self, soup, appendtag):
527+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
528+ if tag:
529+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
530+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
531+
532+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
533+ r.extract()
534+ for nr in range(2, number+1):
535+ soup2 = self.index_to_soup(baseurl + str(nr))
536+ pagetext = soup2.find(id='photoContainer')
537+ if pagetext:
538+ pos = len(appendtag.contents)
539+ appendtag.insert(pos, pagetext)
540+ pagetext = soup2.find(attrs={'class':'photoMeta'})
541+ if pagetext:
542+ pos = len(appendtag.contents)
543+ appendtag.insert(pos, pagetext)
544+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
545+ if pagetext:
546+ pos = len(appendtag.contents)
547+ appendtag.insert(pos, pagetext)
548+
549+ def preprocess_html(self, soup):
550+ self.append_page(soup, soup.body)
551+ return soup
552
553=== added file 'recipes/dziennik_zachodni.recipe'
554--- recipes/dziennik_zachodni.recipe 1970-01-01 00:00:00 +0000
555+++ recipes/dziennik_zachodni.recipe 2013-03-06 19:41:20 +0000
556@@ -0,0 +1,34 @@
557+from calibre.web.feeds.news import BasicNewsRecipe
558+
559+class DziennikZachodni(BasicNewsRecipe):
560+ title = u'Dziennik Zachodni'
561+ __author__ = 'fenuks'
562+ description = u'Gazeta Regionalna Dziennik Zachodni. Najnowsze Wiadomości Śląskie. Wiadomości Śląsk. Czytaj!'
563+ category = 'newspaper'
564+ language = 'pl'
565+ encoding = 'iso-8859-2'
566+ masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/dziennikzachodni.png?24'
567+ oldest_article = 7
568+ max_articles_per_feed = 100
569+ remove_empty_feeds= True
570+ no_stylesheets = True
571+ use_embedded_content = False
572+ ignore_duplicate_articles = {'title', 'url'}
573+ #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
574+ remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
575+ remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'}), dict(attrs={'href':'http://www.dziennikzachodni.pl/piano'})]
576+
577+ feeds = [(u'Wszystkie', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533764/index.rss?201302'), (u'Wiadomo\u015bci', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533765/index.rss?201302'), (u'Regiony', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Opinie', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_regiony.xml?201302'), (u'Blogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_blogi.xml?201302'), (u'Serwisy', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_serwisy.xml?201302'), (u'Sport', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533766/index.rss?201302'), (u'M\xf3j Reporter', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_mojreporter.xml?201302'), (u'Na narty', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_nanarty.xml?201302'), (u'Drogi', u'http://www.dziennikzachodni.pl/rss/dziennikzachodni_drogi.xml?201302'), (u'Pieni\u0105dze', u'http://dziennikzachodni.feedsportal.com/c/32980/f/533768/index.rss?201302')]
578+
579+ def print_version(self, url):
580+ return url.replace('artykul', 'drukuj')
581+
582+ def skip_ad_pages(self, soup):
583+ if 'Advertisement' in soup.title:
584+ nexturl=soup.find('a')['href']
585+ return self.index_to_soup(nexturl, raw=True)
586+
587+ def get_cover_url(self):
588+ soup = self.index_to_soup('http://www.prasa24.pl/gazeta/dziennik-zachodni/')
589+ self.cover_url=soup.find(id='pojemnik').img['src']
590+ return getattr(self, 'cover_url', self.cover_url)
591
592=== added file 'recipes/echo_dnia.recipe'
593--- recipes/echo_dnia.recipe 1970-01-01 00:00:00 +0000
594+++ recipes/echo_dnia.recipe 2013-03-06 19:41:20 +0000
595@@ -0,0 +1,74 @@
596+import re
597+from calibre.web.feeds.news import BasicNewsRecipe
598+
599+class EchoDnia(BasicNewsRecipe):
600+ title = u'Echo Dnia'
601+ __author__ = 'fenuks'
602+ description = u'Echo Dnia - portal regionalny świętokrzyskiego radomskiego i podkarpackiego. Najnowsze wiadomości z Twojego regionu, galerie, video, mp3.'
603+ category = 'newspaper'
604+ language = 'pl'
605+ encoding = 'iso-8859-2'
606+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
607+ INDEX = 'http://www.echodnia.eu'
608+ masthead_url = INDEX + '/images/top_logo.png'
609+ oldest_article = 7
610+ max_articles_per_feed = 100
611+ remove_empty_feeds = True
612+ no_stylesheets = True
613+ ignore_duplicate_articles = {'title', 'url'}
614+
615+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
616+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
617+
618+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
619+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
620+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
621+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
622+ dict(attrs={'class':'articleFunctions'})]
623+
624+ feeds = [(u'Wszystkie', u'http://www.echodnia.eu/rss.xml'),
625+ (u'Świętokrzyskie', u'http://www.echodnia.eu/swietokrzyskie.xml'),
626+ (u'Radomskie', u'http://www.echodnia.eu/radomskie.xml'),
627+ (u'Podkarpackie', u'http://www.echodnia.eu/podkarpackie.xml'),
628+ (u'Sport \u015bwi\u0119tokrzyski', u'http://www.echodnia.eu/sport_swi.xml'),
629+ (u'Sport radomski', u'http://www.echodnia.eu/sport_rad.xml'),
630+ (u'Sport podkarpacki', u'http://www.echodnia.eu/sport_pod.xml'),
631+ (u'Pi\u0142ka no\u017cna', u'http://www.echodnia.eu/pilka.xml'),
632+ (u'Praca', u'http://www.echodnia.eu/praca.xml'),
633+ (u'Dom', u'http://www.echodnia.eu/dom.xml'),
634+ (u'Auto', u'http://www.echodnia.eu/auto.xml'),
635+ (u'Zdrowie', u'http://www.echodnia.eu/zdrowie.xml')]
636+
637+ def get_cover_url(self):
638+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
639+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
640+ soup = self.index_to_soup(nexturl)
641+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
642+ return getattr(self, 'cover_url', self.cover_url)
643+
644+ def append_page(self, soup, appendtag):
645+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
646+ if tag:
647+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
648+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
649+
650+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
651+ r.extract()
652+ for nr in range(2, number+1):
653+ soup2 = self.index_to_soup(baseurl + str(nr))
654+ pagetext = soup2.find(id='photoContainer')
655+ if pagetext:
656+ pos = len(appendtag.contents)
657+ appendtag.insert(pos, pagetext)
658+ pagetext = soup2.find(attrs={'class':'photoMeta'})
659+ if pagetext:
660+ pos = len(appendtag.contents)
661+ appendtag.insert(pos, pagetext)
662+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
663+ if pagetext:
664+ pos = len(appendtag.contents)
665+ appendtag.insert(pos, pagetext)
666+
667+ def preprocess_html(self, soup):
668+ self.append_page(soup, soup.body)
669+ return soup
670
671=== modified file 'recipes/eioba.recipe'
672--- recipes/eioba.recipe 2012-04-18 04:03:44 +0000
673+++ recipes/eioba.recipe 2013-03-06 19:41:20 +0000
674@@ -4,6 +4,7 @@
675 class eioba(BasicNewsRecipe):
676 title = u'eioba'
677 __author__ = 'fenuks'
678+ description = u'eioba.pl - daj się przeczytać!'
679 cover_url = 'http://www.eioba.org/lay/logo_pl_v3.png'
680 language = 'pl'
681 oldest_article = 7
682
683=== modified file 'recipes/elektroda_pl.recipe'
684--- recipes/elektroda_pl.recipe 2012-06-07 06:45:56 +0000
685+++ recipes/elektroda_pl.recipe 2013-03-06 19:41:20 +0000
686@@ -5,7 +5,7 @@
687 title = u'Elektroda'
688 oldest_article = 8
689 __author__ = 'fenuks'
690- description = 'Elektroda.pl'
691+ description = 'Międzynarodowy portal elektroniczny udostępniający bogate zasoby z dziedziny elektroniki oraz forum dyskusyjne.'
692 cover_url = 'http://demotywatory.elektroda.pl/Thunderpic/logo.gif'
693 category = 'electronics'
694 language = 'pl'
695
696=== modified file 'recipes/emuzica_pl.recipe'
697--- recipes/emuzica_pl.recipe 2012-04-18 04:03:44 +0000
698+++ recipes/emuzica_pl.recipe 2013-03-06 19:41:20 +0000
699@@ -12,6 +12,7 @@
700 no_stylesheets = True
701 oldest_article = 7
702 max_articles_per_feed = 100
703+ remove_attributes = ['style']
704 keep_only_tags=[dict(name='div', attrs={'id':'news_container'}), dict(name='h3'), dict(name='div', attrs={'class':'review_text'})]
705 remove_tags=[dict(name='span', attrs={'id':'date'})]
706 feeds = [(u'Aktualno\u015bci', u'http://www.emuzyka.pl/rss.php?f=1'), (u'Recenzje', u'http://www.emuzyka.pl/rss.php?f=2')]
707@@ -20,4 +21,4 @@
708 for a in soup('a'):
709 if a.has_key('href') and 'http://' not in a['href'] and 'https://' not in a['href']:
710 a['href']=self.index + a['href']
711- return soup
712\ No newline at end of file
713+ return soup
714
715=== modified file 'recipes/film_web.recipe'
716--- recipes/film_web.recipe 2012-12-14 09:32:01 +0000
717+++ recipes/film_web.recipe 2013-03-06 19:41:20 +0000
718@@ -4,21 +4,21 @@
719 class FilmWebPl(BasicNewsRecipe):
720 title = u'FilmWeb'
721 __author__ = 'fenuks'
722- description = 'FilmWeb - biggest polish movie site'
723- cover_url = 'http://userlogos.org/files/logos/crudus/filmweb.png'
724+ description = 'Filmweb.pl - Filmy takie jak Ty Filmweb to największy i najczęściej odwiedzany polski serwis filmowy. Największa baza filmów, seriali i aktorów, repertuar kin i tv, ...'
725+ cover_url = 'http://gfx.filmweb.pl/n/logo-filmweb-bevel.jpg'
726 category = 'movies'
727 language = 'pl'
728- index='http://www.filmweb.pl'
729+ index = 'http://www.filmweb.pl'
730 oldest_article = 8
731 max_articles_per_feed = 100
732- no_stylesheets= True
733- remove_empty_feeds=True
734+ no_stylesheets = True
735+ remove_empty_feeds = True
736 ignore_duplicate_articles = {'title', 'url'}
737 preprocess_regexps = [(re.compile(u'\(kliknij\,\ aby powiększyć\)', re.IGNORECASE), lambda m: ''), ]#(re.compile(ur' | ', re.IGNORECASE), lambda m: '')]
738 extra_css = '.hdrBig {font-size:22px;} ul {list-style-type:none; padding: 0; margin: 0;}'
739- remove_tags= [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
740+ remove_tags = [dict(name='div', attrs={'class':['recommendOthers']}), dict(name='ul', attrs={'class':'fontSizeSet'}), dict(attrs={'class':'userSurname anno'})]
741 remove_attributes = ['style',]
742- keep_only_tags= [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
743+ keep_only_tags = [dict(name='h1', attrs={'class':['hdrBig', 'hdrEntity']}), dict(name='div', attrs={'class':['newsInfo', 'newsInfoSmall', 'reviewContent description']})]
744 feeds = [(u'News / Filmy w produkcji', 'http://www.filmweb.pl/feed/news/category/filminproduction'),
745 (u'News / Festiwale, nagrody i przeglądy', u'http://www.filmweb.pl/feed/news/category/festival'),
746 (u'News / Seriale', u'http://www.filmweb.pl/feed/news/category/serials'),
747
748=== modified file 'recipes/focus_pl.recipe'
749--- recipes/focus_pl.recipe 2012-10-25 21:47:19 +0000
750+++ recipes/focus_pl.recipe 2013-03-06 19:41:20 +0000
751@@ -13,7 +13,7 @@
752 title = u'Focus'
753 publisher = u'Gruner + Jahr Polska'
754 category = u'News'
755- description = u'Newspaper'
756+ description = u'Focus.pl - pierwszy w Polsce portal społecznościowy dla miłośników nauki. Tematyka: nauka, historia, cywilizacja, technika, przyroda, sport, gadżety'
757 category = 'magazine'
758 cover_url = ''
759 remove_empty_feeds = True
760
761=== modified file 'recipes/fotoblogia_pl.recipe'
762--- recipes/fotoblogia_pl.recipe 2012-04-18 04:14:36 +0000
763+++ recipes/fotoblogia_pl.recipe 2013-03-06 19:41:20 +0000
764@@ -3,6 +3,7 @@
765 class Fotoblogia_pl(BasicNewsRecipe):
766 title = u'Fotoblogia.pl'
767 __author__ = 'fenuks'
768+ description = u'Jeden z największych polskich blogów o fotografii.'
769 category = 'photography'
770 language = 'pl'
771 masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
772@@ -11,6 +12,6 @@
773 max_articles_per_feed = 100
774 no_stylesheets = True
775 use_embedded_content = False
776- keep_only_tags=[dict(name='div', attrs={'class':'post-view post-standard'})]
777+ keep_only_tags=[dict(name='div', attrs={'class':['post-view post-standard', 'photo-container']})]
778 remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})]
779 feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]
780
781=== added file 'recipes/gazeta_krakowska.recipe'
782--- recipes/gazeta_krakowska.recipe 1970-01-01 00:00:00 +0000
783+++ recipes/gazeta_krakowska.recipe 2013-03-06 19:41:20 +0000
784@@ -0,0 +1,34 @@
785+from calibre.web.feeds.news import BasicNewsRecipe
786+
787+class GazetaKrakowska(BasicNewsRecipe):
788+ title = u'Gazeta Krakowska'
789+ __author__ = 'fenuks'
790+ description = u'Gazeta Regionalna Gazeta Krakowska. Najnowsze Wiadomości Kraków. Informacje Kraków. Czytaj!'
791+ category = 'newspaper'
792+ language = 'pl'
793+ encoding = 'iso-8859-2'
794+ masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gazetakrakowska.png?24'
795+ oldest_article = 7
796+ max_articles_per_feed = 100
797+ remove_empty_feeds = True
798+ no_stylesheets = True
799+ use_embedded_content = False
800+ ignore_duplicate_articles = {'title', 'url'}
801+ #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
802+ remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
803+ remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
804+
805+ feeds = [(u'Fakty24', u'http://gazetakrakowska.feedsportal.com/c/32980/f/533770/index.rss?201302'), (u'Krak\xf3w', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_krakow.xml?201302'), (u'Tarn\xf3w', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_tarnow.xml?201302'), (u'Nowy S\u0105cz', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_nsacz.xml?201302'), (u'Ma\u0142. Zach.', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_malzach.xml?201302'), (u'Podhale', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_podhale.xml?201302'), (u'Sport', u'http://gazetakrakowska.feedsportal.com/c/32980/f/533771/index.rss?201302'), (u'Kultura', u'http://gazetakrakowska.feedsportal.com/c/32980/f/533772/index.rss?201302'), (u'Opinie', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_opinie.xml?201302'), (u'Magazyn', u'http://www.gazetakrakowska.pl/rss/gazetakrakowska_magazyn.xml?201302')]
806+
807+ def print_version(self, url):
808+ return url.replace('artykul', 'drukuj')
809+
810+ def skip_ad_pages(self, soup):
811+ if 'Advertisement' in soup.title:
812+ nexturl=soup.find('a')['href']
813+ return self.index_to_soup(nexturl, raw=True)
814+
815+ def get_cover_url(self):
816+ soup = self.index_to_soup('http://www.prasa24.pl/gazeta/gazeta-krakowska/')
817+ self.cover_url=soup.find(id='pojemnik').img['src']
818+ return getattr(self, 'cover_url', self.cover_url)
819
820=== added file 'recipes/gazeta_lubuska.recipe'
821--- recipes/gazeta_lubuska.recipe 1970-01-01 00:00:00 +0000
822+++ recipes/gazeta_lubuska.recipe 2013-03-06 19:41:20 +0000
823@@ -0,0 +1,64 @@
824+import re
825+from calibre.web.feeds.news import BasicNewsRecipe
826+
827+class GazetaLubuska(BasicNewsRecipe):
828+ title = u'Gazeta Lubuska'
829+ __author__ = 'fenuks'
830+ description = u'Gazeta Lubuska - portal regionalny województwa lubuskiego.'
831+ category = 'newspaper'
832+ language = 'pl'
833+ encoding = 'iso-8859-2'
834+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
835+ INDEX = 'http://www.gazetalubuska.pl'
836+ masthead_url = INDEX + '/images/top_logo.png'
837+ oldest_article = 7
838+ max_articles_per_feed = 100
839+ remove_empty_feeds = True
840+ no_stylesheets = True
841+ ignore_duplicate_articles = {'title', 'url'}
842+
843+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
844+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
845+
846+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
847+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
848+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
849+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
850+ dict(attrs={'class':'articleFunctions'})]
851+
852+ feeds = [(u'Wszystkie', u'http://www.gazetalubuska.pl/rss.xml'), (u'Dreznenko', u'http://www.gazetalubuska.pl/drezdenko.xml'), (u'G\u0142og\xf3w', u'http://www.gazetalubuska.pl/glogow.xml'), (u'Gorz\xf3w Wielkopolski', u'http://www.gazetalubuska.pl/gorzow-wielkopolski.xml'), (u'Gubin', u'http://www.gazetalubuska.pl/gubin.xml'), (u'Kostrzyn', u'http://www.gazetalubuska.pl/kostrzyn.xml'), (u'Krosno Odrza\u0144skie', u'http://www.gazetalubuska.pl/krosno-odrzanskie.xml'), (u'Lubsko', u'http://www.gazetalubuska.pl/lubsko.xml'), (u'Mi\u0119dzych\xf3d', u'http://www.gazetalubuska.pl/miedzychod.xml'), (u'Mi\u0119dzyrzecz', u'http://www.gazetalubuska.pl/miedzyrzecz.xml'), (u'Nowa S\xf3l', u'http://www.gazetalubuska.pl/nowa-sol.xml'), (u'S\u0142ubice', u'http://www.gazetalubuska.pl/slubice.xml'), (u'Strzelce Kraje\u0144skie', u'http://www.gazetalubuska.pl/strzelce-krajenskie.xml'), (u'Sulech\xf3w', u'http://www.gazetalubuska.pl/sulechow.xml'), (u'Sul\u0119cin', u'http://www.gazetalubuska.pl/sulecin.xml'), (u'\u015awi\u0119bodzin', u'http://www.gazetalubuska.pl/swiebodzin.xml'), (u'Wolsztyn', u'http://www.gazetalubuska.pl/wolsztyn.xml'), (u'Wschowa', u'http://www.gazetalubuska.pl/wschowa.xml'), (u'Zielona G\xf3ra', u'http://www.gazetalubuska.pl/zielona-gora.xml'), (u'\u017baga\u0144', u'http://www.gazetalubuska.pl/zagan.xml'), (u'\u017bary', u'http://www.gazetalubuska.pl/zary.xml'), (u'Sport', u'http://www.gazetalubuska.pl/sport.xml'), (u'Auto', u'http://www.gazetalubuska.pl/auto.xml'), (u'Dom', u'http://www.gazetalubuska.pl/dom.xml'), (u'Praca', u'http://www.gazetalubuska.pl/praca.xml'), (u'Zdrowie', u'http://www.gazetalubuska.pl/zdrowie.xml')]
853+
854+
855+ def get_cover_url(self):
856+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
857+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
858+ soup = self.index_to_soup(nexturl)
859+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
860+ return getattr(self, 'cover_url', self.cover_url)
861+
862+ def append_page(self, soup, appendtag):
863+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
864+ if tag:
865+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
866+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
867+
868+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
869+ r.extract()
870+ for nr in range(2, number+1):
871+ soup2 = self.index_to_soup(baseurl + str(nr))
872+ pagetext = soup2.find(id='photoContainer')
873+ if pagetext:
874+ pos = len(appendtag.contents)
875+ appendtag.insert(pos, pagetext)
876+ pagetext = soup2.find(attrs={'class':'photoMeta'})
877+ if pagetext:
878+ pos = len(appendtag.contents)
879+ appendtag.insert(pos, pagetext)
880+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
881+ if pagetext:
882+ pos = len(appendtag.contents)
883+ appendtag.insert(pos, pagetext)
884+
885+ def preprocess_html(self, soup):
886+ self.append_page(soup, soup.body)
887+ return soup
888
889=== modified file 'recipes/gazeta_pomorska.recipe'
890--- recipes/gazeta_pomorska.recipe 2013-03-04 23:06:55 +0000
891+++ recipes/gazeta_pomorska.recipe 2013-03-06 19:41:20 +0000
892@@ -1,102 +1,91 @@
893-#!/usr/bin/env python
894-
895-# # Przed uzyciem przeczytaj komentarz w sekcji "feeds"
896-
897-__license__ = 'GPL v3'
898-__copyright__ = u'2010, Richard z forum.eksiazki.org'
899-'''pomorska.pl'''
900-
901 import re
902 from calibre.web.feeds.news import BasicNewsRecipe
903
904 class GazetaPomorska(BasicNewsRecipe):
905 title = u'Gazeta Pomorska'
906- publisher = u'Gazeta Pomorska'
907- description = u'Kujawy i Pomorze - wiadomo\u015bci'
908+ __author__ = 'Richard z forum.eksiazki.org, fenuks'
909+ description = u'Gazeta Pomorska - portal regionalny'
910+ category = 'newspaper'
911 language = 'pl'
912- __author__ = u'Richard z forum.eksiazki.org'
913- # # (dziekuje t3d z forum.eksiazki.org za testy)
914- oldest_article = 2
915- max_articles_per_feed = 20
916+ encoding = 'iso-8859-2'
917+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
918+ INDEX = 'http://www.pomorska.pl'
919+ masthead_url = INDEX + '/images/top_logo.png'
920+ oldest_article = 7
921+ max_articles_per_feed = 100
922+ remove_empty_feeds = True
923 no_stylesheets = True
924- remove_javascript = True
925- preprocess_regexps = [
926- (re.compile(r'<a href="http://maps.google[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
927- (re.compile(r'[<Bb >]*Poznaj opinie[^<]*[</Bb >]*[^<]*<a href[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
928- (re.compile(r'[<Bb >]*Przeczytaj[^<]*[</Bb >]*[^<]*<a href[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
929- (re.compile(r'[<Bb >]*Wi.cej informacji[^<]*[</Bb >]*[^<]*<a href[^>]*>[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
930- (re.compile(r'<a href[^>]*>[<Bb >]*Wideo[^<]*[</Bb >]*[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: ''),
931- (re.compile(r'<a href[^>]*>[<Bb >]*KLIKNIJ TUTAJ[^<]*[</Bb >]*[^<]*</a>\.*', re.DOTALL|re.IGNORECASE), lambda m: '')
932- ]
933-
934- feeds = [
935-# # Tutaj jest wymieniona lista kategorii jakie mozemy otrzymywac z Gazety
936-# # Pomorskiej, po jednej kategorii w wierszu. Jesli na poczatku danego wiersza
937-# # znajduje sie jeden znak "#", oznacza to ze kategoria jest zakomentowana
938-# # i nie bedziemy jej otrzymywac. Jesli chcemy ja otrzymywac nalezy usunac
939-# # znak # z jej wiersza.
940-# # Jesli subskrybujemy wiecej niz jedna kategorie, na koncu wiersza z kazda
941-# # kategoria musi sie znajdowac niezakomentowany przecinek, z wyjatkiem
942-# # ostatniego wiersza - ma byc bez przecinka na koncu.
943-# # Rekomendowane opcje wyboru kategorii:
944-# # 1. PomorskaRSS - wiadomosci kazdego typu, lub
945-# # 2. Region + wybrane miasta, lub
946-# # 3. Wiadomosci tematyczne.
947-# # Lista kategorii:
948-
949- # # PomorskaRSS - wiadomosci kazdego typu, zakomentuj znakiem "#"
950- # # przed odkomentowaniem wiadomosci wybranego typu:
951- (u'PomorskaRSS', u'http://www.pomorska.pl/rss.xml')
952-
953- # # wiadomosci z regionu nie przypisane do okreslonego miasta:
954- # (u'Region', u'http://www.pomorska.pl/region.xml'),
955-
956- # # wiadomosci przypisane do miast:
957- # (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'),
958- # (u'Nak\u0142o', u'http://www.pomorska.pl/naklo.xml'),
959- # (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'),
960- # (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'),
961- # (u'Grudzi\u0105dz', u'http://www.pomorska.pl/grudziadz.xml'),
962- # (u'Inowroc\u0142aw', u'http://www.pomorska.pl/inowroclaw.xml'),
963- # (u'Toru\u0144', u'http://www.pomorska.pl/torun.xml'),
964- # (u'W\u0142oc\u0142awek', u'http://www.pomorska.pl/wloclawek.xml'),
965- # (u'Aleksandr\u00f3w Kujawski', u'http://www.pomorska.pl/aleksandrow.xml'),
966- # (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'),
967- # (u'Che\u0142mno', u'http://www.pomorska.pl/chelmno.xml'),
968- # (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'),
969- # (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'),
970- # (u'Golub Dobrzy\u0144', u'http://www.pomorska.pl/golubdobrzyn.xml'),
971- # (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'),
972- # (u'Radziej\u00f3w', u'http://www.pomorska.pl/radziejow.xml'),
973- # (u'Rypin', u'http://www.pomorska.pl/rypin.xml'),
974- # (u'S\u0119p\u00f3lno', u'http://www.pomorska.pl/sepolno.xml'),
975- # (u'\u015awiecie', u'http://www.pomorska.pl/swiecie.xml'),
976- # (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'),
977- # (u'\u017bnin', u'http://www.pomorska.pl/znin.xml')
978-
979- # # wiadomosci tematyczne (redundancja z region/miasta):
980- # (u'Sport', u'http://www.pomorska.pl/sport.xml'),
981- # (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'),
982- # (u'Auto', u'http://www.pomorska.pl/moto.xml'),
983- # (u'Dom', u'http://www.pomorska.pl/dom.xml'),
984- # (u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'),
985- # (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')
986- ]
987-
988- keep_only_tags = [dict(name='div', attrs={'id':'article'})]
989-
990- remove_tags = [
991- dict(name='p', attrs={'id':'articleTags'}),
992- dict(name='div', attrs={'id':'articleEpaper'}),
993- dict(name='div', attrs={'id':'articleConnections'}),
994- dict(name='div', attrs={'class':'articleFacts'}),
995- dict(name='div', attrs={'id':'articleExternalLink'}),
996- dict(name='div', attrs={'id':'articleMultimedia'}),
997- dict(name='div', attrs={'id':'articleGalleries'}),
998- dict(name='div', attrs={'id':'articleAlarm'}),
999- dict(name='div', attrs={'id':'adholder_srodek1'}),
1000- dict(name='div', attrs={'id':'articleVideo'}),
1001- dict(name='a', attrs={'name':'fb_share'})]
1002-
1003- extra_css = '''h1 { font-size: 1.4em; }
1004- h2 { font-size: 1.0em; }'''
1005+ ignore_duplicate_articles = {'title', 'url'}
1006+
1007+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
1008+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
1009+
1010+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
1011+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
1012+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
1013+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
1014+ dict(attrs={'class':'articleFunctions'})]
1015+
1016+ feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'),
1017+ (u'Region', u'http://www.pomorska.pl/region.xml'),
1018+ (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'),
1019+ (u'Nakło', u'http://www.pomorska.pl/naklo.xml'),
1020+ (u'Koronowo', u'http://www.pomorska.pl/koronowo.xml'),
1021+ (u'Solec Kujawski', u'http://www.pomorska.pl/soleckujawski.xml'),
1022+ (u'Grudziądz', u'http://www.pomorska.pl/grudziadz.xml'),
1023+ (u'Inowrocław', u'http://www.pomorska.pl/inowroclaw.xml'),
1024+ (u'Toruń', u'http://www.pomorska.pl/torun.xml'),
1025+ (u'Włocławek', u'http://www.pomorska.pl/wloclawek.xml'),
1026+ (u'Aleksandrów Kujawski', u'http://www.pomorska.pl/aleksandrow.xml'),
1027+ (u'Brodnica', u'http://www.pomorska.pl/brodnica.xml'),
1028+ (u'Chełmno', u'http://www.pomorska.pl/chelmno.xml'),
1029+ (u'Chojnice', u'http://www.pomorska.pl/chojnice.xml'),
1030+ (u'Ciechocinek', u'http://www.pomorska.pl/ciechocinek.xml'),
1031+ (u'Golub-Dobrzyń', u'http://www.pomorska.pl/golubdobrzyn.xml'),
1032+ (u'Mogilno', u'http://www.pomorska.pl/mogilno.xml'),
1033+ (u'Radziejów', u'http://www.pomorska.pl/radziejow.xml'),
1034+ (u'Rypin', u'http://www.pomorska.pl/rypin.xml'),
1035+ (u'Sępólno', u'http://www.pomorska.pl/sepolno.xml'),
1036+ (u'Świecie', u'http://www.pomorska.pl/swiecie.xml'),
1037+ (u'Tuchola', u'http://www.pomorska.pl/tuchola.xml'),
1038+ (u'Żnin', u'http://www.pomorska.pl/znin.xml'),
1039+ (u'Sport', u'http://www.pomorska.pl/sport.xml'),
1040+ (u'Zdrowie', u'http://www.pomorska.pl/zdrowie.xml'),
1041+ (u'Auto', u'http://www.pomorska.pl/moto.xml'),
1042+ (u'Dom', u'http://www.pomorska.pl/dom.xml'),
1043+ #(u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'),
1044+ (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')]
1045+
1046+ def get_cover_url(self):
1047+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
1048+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
1049+ soup = self.index_to_soup(nexturl)
1050+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
1051+ return getattr(self, 'cover_url', self.cover_url)
1052+
1053+ def append_page(self, soup, appendtag):
1054+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
1055+ if tag:
1056+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
1057+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
1058+
1059+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
1060+ r.extract()
1061+ for nr in range(2, number+1):
1062+ soup2 = self.index_to_soup(baseurl + str(nr))
1063+ pagetext = soup2.find(id='photoContainer')
1064+ if pagetext:
1065+ pos = len(appendtag.contents)
1066+ appendtag.insert(pos, pagetext)
1067+ pagetext = soup2.find(attrs={'class':'photoMeta'})
1068+ if pagetext:
1069+ pos = len(appendtag.contents)
1070+ appendtag.insert(pos, pagetext)
1071+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
1072+ if pagetext:
1073+ pos = len(appendtag.contents)
1074+ appendtag.insert(pos, pagetext)
1075+
1076+ def preprocess_html(self, soup):
1077+ self.append_page(soup, soup.body)
1078+ return soup
1079
1080=== added file 'recipes/gazeta_wroclawska.recipe'
1081--- recipes/gazeta_wroclawska.recipe 1970-01-01 00:00:00 +0000
1082+++ recipes/gazeta_wroclawska.recipe 2013-03-06 19:41:20 +0000
1083@@ -0,0 +1,34 @@
1084+from calibre.web.feeds.news import BasicNewsRecipe
1085+
1086+class GazetaWroclawska(BasicNewsRecipe):
1087+ title = u'Gazeta Wroc\u0142awska'
1088+ __author__ = 'fenuks'
1089+ description = u'Gazeta Regionalna Gazeta Wrocławska. Najnowsze Wiadomości Wrocław, Informacje Wrocław. Czytaj!'
1090+ category = 'newspaper'
1091+ language = 'pl'
1092+ encoding = 'iso-8859-2'
1093+ masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gazetawroclawska.png?24'
1094+ oldest_article = 7
1095+ max_articles_per_feed = 100
1096+ remove_empty_feeds = True
1097+ no_stylesheets = True
1098+ use_embedded_content = False
1099+ ignore_duplicate_articles = {'title', 'url'}
1100+ #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
1101+ remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
1102+ remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
1103+
1104+ feeds = [(u'Fakty24', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533775/index.rss?201302'), (u'Region', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_region.xml?201302'), (u'Kultura', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533777/index.rss?201302'), (u'Sport', u'http://gazetawroclawska.feedsportal.com/c/32980/f/533776/index.rss?201302'), (u'Z archiwum', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_zarchiwum.xml?201302'), (u'M\xf3j reporter', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_mojreporter.xml?201302'), (u'Historia', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_historia.xml?201302'), (u'Listy do redakcji', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_listydoredakcji.xml?201302'), (u'Na drogach', u'http://www.gazetawroclawska.pl/rss/gazetawroclawska_nadrogach.xml?201302')]
1105+
1106+ def print_version(self, url):
1107+ return url.replace('artykul', 'drukuj')
1108+
1109+ def skip_ad_pages(self, soup):
1110+ if 'Advertisement' in soup.title:
1111+ nexturl=soup.find('a')['href']
1112+ return self.index_to_soup(nexturl, raw=True)
1113+
1114+ def get_cover_url(self):
1115+ soup = self.index_to_soup('http://www.prasa24.pl/gazeta/gazeta-wroclawska/')
1116+ self.cover_url=soup.find(id='pojemnik').img['src']
1117+ return getattr(self, 'cover_url', self.cover_url)
1118
1119=== added file 'recipes/gazeta_wspolczesna.recipe'
1120--- recipes/gazeta_wspolczesna.recipe 1970-01-01 00:00:00 +0000
1121+++ recipes/gazeta_wspolczesna.recipe 2013-03-06 19:41:20 +0000
1122@@ -0,0 +1,63 @@
1123+import re
1124+from calibre.web.feeds.news import BasicNewsRecipe
1125+
1126+class GazetaWspolczesna(BasicNewsRecipe):
1127+ title = u'Gazeta Wsp\xf3\u0142czesna'
1128+ __author__ = 'fenuks'
1129+ description = u'Gazeta Współczesna - portal regionalny.'
1130+ category = 'newspaper'
1131+ language = 'pl'
1132+ encoding = 'iso-8859-2'
1133+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
1134+ INDEX = 'http://www.wspolczesna.pl'
1135+ masthead_url = INDEX + '/images/top_logo.png'
1136+ oldest_article = 7
1137+ max_articles_per_feed = 100
1138+ remove_empty_feeds = True
1139+ no_stylesheets = True
1140+ ignore_duplicate_articles = {'title', 'url'}
1141+
1142+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
1143+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
1144+
1145+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
1146+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
1147+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
1148+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
1149+ dict(attrs={'class':'articleFunctions'})]
1150+
1151+ feeds = [(u'Wszystkie', u'http://www.wspolczesna.pl/rss.xml'), (u'August\xf3w', u'http://www.wspolczesna.pl/augustow.xml'), (u'Bia\u0142ystok', u'http://www.wspolczesna.pl/bialystok.xml'), (u'Bielsk Podlaski', u'http://www.wspolczesna.pl/bielsk.xml'), (u'E\u0142k', u'http://www.wspolczesna.pl/elk.xml'), (u'Grajewo', u'http://www.wspolczesna.pl/grajewo.xml'), (u'Go\u0142dap', u'http://www.wspolczesna.pl/goldap.xml'), (u'Hajn\xf3wka', u'http://www.wspolczesna.pl/hajnowka.xml'), (u'Kolno', u'http://www.wspolczesna.pl/kolno.xml'), (u'\u0141om\u017ca', u'http://www.wspolczesna.pl/lomza.xml'), (u'Mo\u0144ki', u'http://www.wspolczesna.pl/monki.xml'), (u'Olecko', u'http://www.wspolczesna.pl/olecko.xml'), (u'Ostro\u0142\u0119ka', u'http://www.wspolczesna.pl/ostroleka.xml'), (u'Powiat Bia\u0142ostocki', u'http://www.wspolczesna.pl/powiat.xml'), (u'Sejny', u'http://www.wspolczesna.pl/sejny.xml'), (u'Siemiatycze', u'http://www.wspolczesna.pl/siemiatycze.xml'), (u'Sok\xf3\u0142ka', u'http://www.wspolczesna.pl/sokolka.xml'), (u'Suwa\u0142ki', u'http://www.wspolczesna.pl/suwalki.xml'), (u'Wysokie Mazowieckie', u'http://www.wspolczesna.pl/wysokie.xml'), (u'Zambr\xf3w', u'http://www.wspolczesna.pl/zambrow.xml'), (u'Sport', u'http://www.wspolczesna.pl/sport.xml'), (u'Praca', u'http://www.wspolczesna.pl/praca.xml'), (u'Dom', u'http://www.wspolczesna.pl/dom.xml'), (u'Auto', u'http://www.wspolczesna.pl/auto.xml'), (u'Zdrowie', u'http://www.wspolczesna.pl/zdrowie.xml')]
1152+
1153+ def get_cover_url(self):
1154+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
1155+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
1156+ soup = self.index_to_soup(nexturl)
1157+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
1158+ return getattr(self, 'cover_url', self.cover_url)
1159+
1160+ def append_page(self, soup, appendtag):
1161+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
1162+ if tag:
1163+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
1164+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
1165+
1166+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
1167+ r.extract()
1168+ for nr in range(2, number+1):
1169+ soup2 = self.index_to_soup(baseurl + str(nr))
1170+ pagetext = soup2.find(id='photoContainer')
1171+ if pagetext:
1172+ pos = len(appendtag.contents)
1173+ appendtag.insert(pos, pagetext)
1174+ pagetext = soup2.find(attrs={'class':'photoMeta'})
1175+ if pagetext:
1176+ pos = len(appendtag.contents)
1177+ appendtag.insert(pos, pagetext)
1178+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
1179+ if pagetext:
1180+ pos = len(appendtag.contents)
1181+ appendtag.insert(pos, pagetext)
1182+
1183+ def preprocess_html(self, soup):
1184+ self.append_page(soup, soup.body)
1185+ return soup
1186
1187=== modified file 'recipes/gazeta_wyborcza.recipe'
1188--- recipes/gazeta_wyborcza.recipe 2012-11-11 12:08:23 +0000
1189+++ recipes/gazeta_wyborcza.recipe 2013-03-06 19:41:20 +0000
1190@@ -6,7 +6,7 @@
1191 title = u'Gazeta.pl'
1192 __author__ = 'fenuks, Artur Stachecki'
1193 language = 'pl'
1194- description = 'news from gazeta.pl'
1195+ description = 'Wiadomości z Polski i ze świata. Serwisy tematyczne i lokalne w 20 miastach.'
1196 category = 'newspaper'
1197 publication_type = 'newspaper'
1198 masthead_url = 'http://bi.gazeta.pl/im/5/10285/z10285445AA.jpg'
1199
1200=== added file 'recipes/gcn.recipe'
1201--- recipes/gcn.recipe 1970-01-01 00:00:00 +0000
1202+++ recipes/gcn.recipe 2013-03-06 19:41:20 +0000
1203@@ -0,0 +1,83 @@
1204+import re
1205+from calibre.web.feeds.news import BasicNewsRecipe
1206+
1207+class GCN(BasicNewsRecipe):
1208+ title = u'Gazeta Codziennej Nowiny'
1209+ __author__ = 'fenuks'
1210+ description = u'nowiny24.pl - portal regionalny województwa podkarpackiego.'
1211+ category = 'newspaper'
1212+ language = 'pl'
1213+ encoding = 'iso-8859-2'
1214+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
1215+ INDEX = 'http://www.nowiny24.pl'
1216+ masthead_url = INDEX + '/images/top_logo.png'
1217+ oldest_article = 7
1218+ max_articles_per_feed = 100
1219+ remove_empty_feeds = True
1220+ no_stylesheets = True
1221+ ignore_duplicate_articles = {'title', 'url'}
1222+
1223+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
1224+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
1225+
1226+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
1227+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
1228+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
1229+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
1230+ dict(attrs={'class':'articleFunctions'})]
1231+
1232+ feeds = [(u'Wszystkie', u'http://www.nowiny24.pl/rss.xml'),
1233+ (u'Podkarpacie', u'http://www.nowiny24.pl/podkarpacie.xml'),
1234+ (u'Bieszczady', u'http://www.nowiny24.pl/bieszczady.xml'),
1235+ (u'Rzeszów', u'http://www.nowiny24.pl/rzeszow.xml'),
1236+ (u'Przemyśl', u'http://www.nowiny24.pl/przemysl.xml'),
1237+ (u'Leżajsk', u'http://www.nowiny24.pl/lezajsk.xml'),
1238+ (u'Łańcut', u'http://www.nowiny24.pl/lancut.xml'),
1239+ (u'Dębica', u'http://www.nowiny24.pl/debica.xml'),
1240+ (u'Jarosław', u'http://www.nowiny24.pl/jaroslaw.xml'),
1241+ (u'Krosno', u'http://www.nowiny24.pl/krosno.xml'),
1242+ (u'Mielec', u'http://www.nowiny24.pl/mielec.xml'),
1243+ (u'Nisko', u'http://www.nowiny24.pl/nisko.xml'),
1244+ (u'Sanok', u'http://www.nowiny24.pl/sanok.xml'),
1245+ (u'Stalowa Wola', u'http://www.nowiny24.pl/stalowawola.xml'),
1246+ (u'Tarnobrzeg', u'http://www.nowiny24.pl/tarnobrzeg.xml'),
1247+ (u'Sport', u'http://www.nowiny24.pl/sport.xml'),
1248+ (u'Dom', u'http://www.nowiny24.pl/dom.xml'),
1249+ (u'Auto', u'http://www.nowiny24.pl/auto.xml'),
1250+ (u'Praca', u'http://www.nowiny24.pl/praca.xml'),
1251+ (u'Zdrowie', u'http://www.nowiny24.pl/zdrowie.xml'),
1252+ (u'Wywiady', u'http://www.nowiny24.pl/wywiady.xml')]
1253+
1254+ def get_cover_url(self):
1255+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
1256+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
1257+ soup = self.index_to_soup(nexturl)
1258+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
1259+ return getattr(self, 'cover_url', self.cover_url)
1260+
1261+ def append_page(self, soup, appendtag):
1262+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
1263+ if tag:
1264+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
1265+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
1266+
1267+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
1268+ r.extract()
1269+ for nr in range(2, number+1):
1270+ soup2 = self.index_to_soup(baseurl + str(nr))
1271+ pagetext = soup2.find(id='photoContainer')
1272+ if pagetext:
1273+ pos = len(appendtag.contents)
1274+ appendtag.insert(pos, pagetext)
1275+ pagetext = soup2.find(attrs={'class':'photoMeta'})
1276+ if pagetext:
1277+ pos = len(appendtag.contents)
1278+ appendtag.insert(pos, pagetext)
1279+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
1280+ if pagetext:
1281+ pos = len(appendtag.contents)
1282+ appendtag.insert(pos, pagetext)
1283+
1284+ def preprocess_html(self, soup):
1285+ self.append_page(soup, soup.body)
1286+ return soup
1287
1288=== added file 'recipes/glos_wielkopolski.recipe'
1289--- recipes/glos_wielkopolski.recipe 1970-01-01 00:00:00 +0000
1290+++ recipes/glos_wielkopolski.recipe 2013-03-06 19:41:20 +0000
1291@@ -0,0 +1,34 @@
1292+from calibre.web.feeds.news import BasicNewsRecipe
1293+
1294+class GlosWielkopolski(BasicNewsRecipe):
1295+ title = u'G\u0142os Wielkopolski'
1296+ __author__ = 'fenuks'
1297+ description = u'Gazeta Regionalna Głos Wielkopolski. Najnowsze Wiadomości Poznań. Czytaj Informacje Poznań!'
1298+ category = 'newspaper'
1299+ language = 'pl'
1300+ encoding = 'iso-8859-2'
1301+ masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/gloswielkopolski.png?24'
1302+ oldest_article = 7
1303+ max_articles_per_feed = 100
1304+ remove_empty_feeds= True
1305+ no_stylesheets = True
1306+ use_embedded_content = False
1307+ ignore_duplicate_articles = {'title', 'url'}
1308+ #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
1309+ remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
1310+ remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
1311+
1312+ feeds = [(u'Wszystkie', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533779/index.rss?201302'), (u'Wiadomo\u015bci', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533780/index.rss?201302'), (u'Sport', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533781/index.rss?201302'), (u'Kultura', u'http://gloswielkopolski.feedsportal.com/c/32980/f/533782/index.rss?201302'), (u'Porady', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_porady.xml?201302'), (u'Blogi', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_blogi.xml?201302'), (u'Nasze akcje', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_naszeakcje.xml?201302'), (u'Opinie', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_opinie.xml?201302'), (u'Magazyn', u'http://www.gloswielkopolski.pl/rss/gloswielkopolski_magazyn.xml?201302')]
1313+
1314+ def print_version(self, url):
1315+ return url.replace('artykul', 'drukuj')
1316+
1317+ def skip_ad_pages(self, soup):
1318+ if 'Advertisement' in soup.title:
1319+ nexturl=soup.find('a')['href']
1320+ return self.index_to_soup(nexturl, raw=True)
1321+
1322+ def get_cover_url(self):
1323+ soup = self.index_to_soup('http://www.prasa24.pl/gazeta/glos-wielkopolski/')
1324+ self.cover_url=soup.find(id='pojemnik').img['src']
1325+ return getattr(self, 'cover_url', self.cover_url)
1326
1327=== modified file 'recipes/gram_pl.recipe'
1328--- recipes/gram_pl.recipe 2012-12-14 09:32:01 +0000
1329+++ recipes/gram_pl.recipe 2013-03-06 19:41:20 +0000
1330@@ -11,15 +11,14 @@
1331 max_articles_per_feed = 100
1332 ignore_duplicate_articles = {'title', 'url'}
1333 no_stylesheets= True
1334+ remove_empty_feeds = True
1335 #extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
1336 cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
1337 keep_only_tags= [dict(id='articleModule')]
1338- remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter']})]
1339+ remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']})]
1340 feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
1341- (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles'),
1342- (u'Kolektyw- Indie Games', u'http://indie.gram.pl/feed/'),
1343- #(u'Kolektyw- Moto Games', u'http://www.motogames.gram.pl/news.rss')
1344- ]
1345+ (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')
1346+ ]
1347
1348 def parse_feeds (self):
1349 feeds = BasicNewsRecipe.parse_feeds(self)
1350
1351=== modified file 'recipes/gry_online_pl.recipe'
1352--- recipes/gry_online_pl.recipe 2012-10-17 14:12:08 +0000
1353+++ recipes/gry_online_pl.recipe 2013-03-06 19:41:20 +0000
1354@@ -1,20 +1,23 @@
1355+import time
1356 from calibre.web.feeds.recipes import BasicNewsRecipe
1357
1358 class GryOnlinePl(BasicNewsRecipe):
1359 title = u'Gry-Online.pl'
1360 __author__ = 'fenuks'
1361- description = 'Gry-Online.pl - computer games'
1362+ description = u'Wiadomości o grach, recenzje, zapowiedzi. Encyklopedia Gier zawiera opisy gier na PC, konsole Xbox360, PS3 i inne platformy.'
1363 category = 'games'
1364 language = 'pl'
1365 oldest_article = 13
1366- INDEX= 'http://www.gry-online.pl/'
1367- masthead_url='http://www.gry-online.pl/im/gry-online-logo.png'
1368- cover_url='http://www.gry-online.pl/im/gry-online-logo.png'
1369+ INDEX = 'http://www.gry-online.pl/'
1370+ masthead_url = 'http://www.gry-online.pl/im/gry-online-logo.png'
1371+ cover_url = 'http://www.gry-online.pl/im/gry-online-logo.png'
1372 max_articles_per_feed = 100
1373- no_stylesheets= True
1374- keep_only_tags=[dict(name='div', attrs={'class':['gc660', 'gc660 S013']})]
1375- remove_tags=[dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
1376- feeds = [(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
1377+ no_stylesheets = True
1378+ keep_only_tags = [dict(name='div', attrs={'class':['gc660', 'gc660 S013', 'news_endpage_tit', 'news_container', 'news']})]
1379+ remove_tags = [dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
1380+ feeds = [
1381+ (u'Newsy', 'http://www.gry-online.pl/rss/news.xml'),
1382+ ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
1383
1384
1385 def append_page(self, soup, appendtag):
1386@@ -24,7 +27,14 @@
1387 url_part = soup.find('link', attrs={'rel':'canonical'})['href']
1388 url_part = url_part[25:].rpartition('?')[0]
1389 for nexturl in nexturls[1:-1]:
1390- soup2 = self.index_to_soup('http://www.gry-online.pl/' + url_part + nexturl['href'])
1391+ finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href']
1392+ for i in range(10):
1393+ try:
1394+ soup2 = self.index_to_soup(finalurl)
1395+ break
1396+ except:
1397+ print 'retrying in 0.5s'
1398+ time.sleep(0.5)
1399 pagetext = soup2.find(attrs={'class':'gc660'})
1400 for r in pagetext.findAll(name='header'):
1401 r.extract()
1402@@ -34,7 +44,42 @@
1403 appendtag.insert(pos, pagetext)
1404 for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry']}):
1405 r.extract()
1406+ else:
1407+ tag = appendtag.find('div', attrs={'class':'S018stronyr'})
1408+ if tag:
1409+ nexturl = tag.a
1410+ url_part = soup.find('link', attrs={'rel':'canonical'})['href']
1411+ url_part = url_part[25:].rpartition('?')[0]
1412+ while tag:
1413+ end = tag.find(attrs={'class':'right left-dead'})
1414+ if end:
1415+ break
1416+ else:
1417+ nexturl = tag.a
1418+ finalurl = 'http://www.gry-online.pl/' + url_part + nexturl['href']
1419+ for i in range(10):
1420+ try:
1421+ soup2 = self.index_to_soup(finalurl)
1422+ break
1423+ except:
1424+ print 'retrying in 0.5s'
1425+ time.sleep(0.5)
1426+ tag = soup2.find('div', attrs={'class':'S018stronyr'})
1427+ pagetext = soup2.find(attrs={'class':'gc660'})
1428+ for r in pagetext.findAll(name='header'):
1429+ r.extract()
1430+ for r in pagetext.findAll(attrs={'itemprop':'description'}):
1431+ r.extract()
1432+ pos = len(appendtag.contents)
1433+ appendtag.insert(pos, pagetext)
1434+ for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'S018strony']}):
1435+ r.extract()
1436
1437+ def image_url_processor(self, baseurl, url):
1438+ if url.startswith('..'):
1439+ return url[2:]
1440+ else:
1441+ return url
1442
1443 def preprocess_html(self, soup):
1444 self.append_page(soup, soup.body)
1445
1446=== added file 'recipes/icons/biweekly.png'
1447Binary files recipes/icons/biweekly.png 1970-01-01 00:00:00 +0000 and recipes/icons/biweekly.png 2013-03-06 19:41:20 +0000 differ
1448=== added file 'recipes/icons/dwutygodnik.png'
1449Binary files recipes/icons/dwutygodnik.png 1970-01-01 00:00:00 +0000 and recipes/icons/dwutygodnik.png 2013-03-06 19:41:20 +0000 differ
1450=== added file 'recipes/icons/dziennik_baltycki.png'
1451Binary files recipes/icons/dziennik_baltycki.png 1970-01-01 00:00:00 +0000 and recipes/icons/dziennik_baltycki.png 2013-03-06 19:41:20 +0000 differ
1452=== added file 'recipes/icons/dziennik_lodzki.png'
1453Binary files recipes/icons/dziennik_lodzki.png 1970-01-01 00:00:00 +0000 and recipes/icons/dziennik_lodzki.png 2013-03-06 19:41:20 +0000 differ
1454=== added file 'recipes/icons/dziennik_wschodni.png'
1455Binary files recipes/icons/dziennik_wschodni.png 1970-01-01 00:00:00 +0000 and recipes/icons/dziennik_wschodni.png 2013-03-06 19:41:20 +0000 differ
1456=== added file 'recipes/icons/dziennik_zachodni.png'
1457Binary files recipes/icons/dziennik_zachodni.png 1970-01-01 00:00:00 +0000 and recipes/icons/dziennik_zachodni.png 2013-03-06 19:41:20 +0000 differ
1458=== added file 'recipes/icons/echo_dnia.png'
1459Binary files recipes/icons/echo_dnia.png 1970-01-01 00:00:00 +0000 and recipes/icons/echo_dnia.png 2013-03-06 19:41:20 +0000 differ
1460=== added file 'recipes/icons/emuzica_pl.png'
1461Binary files recipes/icons/emuzica_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/emuzica_pl.png 2013-03-06 19:41:20 +0000 differ
1462=== added file 'recipes/icons/film_org_pl.png'
1463Binary files recipes/icons/film_org_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/film_org_pl.png 2013-03-06 19:41:20 +0000 differ
1464=== added file 'recipes/icons/gazeta_krakowska.png'
1465Binary files recipes/icons/gazeta_krakowska.png 1970-01-01 00:00:00 +0000 and recipes/icons/gazeta_krakowska.png 2013-03-06 19:41:20 +0000 differ
1466=== added file 'recipes/icons/gazeta_lubuska.png'
1467Binary files recipes/icons/gazeta_lubuska.png 1970-01-01 00:00:00 +0000 and recipes/icons/gazeta_lubuska.png 2013-03-06 19:41:20 +0000 differ
1468=== added file 'recipes/icons/gazeta_wroclawska.png'
1469Binary files recipes/icons/gazeta_wroclawska.png 1970-01-01 00:00:00 +0000 and recipes/icons/gazeta_wroclawska.png 2013-03-06 19:41:20 +0000 differ
1470=== added file 'recipes/icons/gazeta_wspolczesna.png'
1471Binary files recipes/icons/gazeta_wspolczesna.png 1970-01-01 00:00:00 +0000 and recipes/icons/gazeta_wspolczesna.png 2013-03-06 19:41:20 +0000 differ
1472=== added file 'recipes/icons/gcn.png'
1473Binary files recipes/icons/gcn.png 1970-01-01 00:00:00 +0000 and recipes/icons/gcn.png 2013-03-06 19:41:20 +0000 differ
1474=== added file 'recipes/icons/glos_wielkopolski.png'
1475Binary files recipes/icons/glos_wielkopolski.png 1970-01-01 00:00:00 +0000 and recipes/icons/glos_wielkopolski.png 2013-03-06 19:41:20 +0000 differ
1476=== added file 'recipes/icons/kurier_lubelski.png'
1477Binary files recipes/icons/kurier_lubelski.png 1970-01-01 00:00:00 +0000 and recipes/icons/kurier_lubelski.png 2013-03-06 19:41:20 +0000 differ
1478=== added file 'recipes/icons/kurier_poranny.png'
1479Binary files recipes/icons/kurier_poranny.png 1970-01-01 00:00:00 +0000 and recipes/icons/kurier_poranny.png 2013-03-06 19:41:20 +0000 differ
1480=== added file 'recipes/icons/kurier_szczecinski.png'
1481Binary files recipes/icons/kurier_szczecinski.png 1970-01-01 00:00:00 +0000 and recipes/icons/kurier_szczecinski.png 2013-03-06 19:41:20 +0000 differ
1482=== added file 'recipes/icons/nowa_fantastyka.png'
1483Binary files recipes/icons/nowa_fantastyka.png 1970-01-01 00:00:00 +0000 and recipes/icons/nowa_fantastyka.png 2013-03-06 19:41:20 +0000 differ
1484=== added file 'recipes/icons/nto.png'
1485Binary files recipes/icons/nto.png 1970-01-01 00:00:00 +0000 and recipes/icons/nto.png 2013-03-06 19:41:20 +0000 differ
1486=== added file 'recipes/icons/tablety_pl.png'
1487Binary files recipes/icons/tablety_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/tablety_pl.png 2013-03-06 19:41:20 +0000 differ
1488=== added file 'recipes/icons/trojmiasto_pl.png'
1489Binary files recipes/icons/trojmiasto_pl.png 1970-01-01 00:00:00 +0000 and recipes/icons/trojmiasto_pl.png 2013-03-06 19:41:20 +0000 differ
1490=== added file 'recipes/icons/zycie_warszawy.png'
1491Binary files recipes/icons/zycie_warszawy.png 1970-01-01 00:00:00 +0000 and recipes/icons/zycie_warszawy.png 2013-03-06 19:41:20 +0000 differ
1492=== added file 'recipes/jazzpress.recipe'
1493--- recipes/jazzpress.recipe 1970-01-01 00:00:00 +0000
1494+++ recipes/jazzpress.recipe 2013-03-06 19:41:20 +0000
1495@@ -0,0 +1,50 @@
1496+#!/usr/bin/env python
1497+# -*- coding: utf-8 -*-
1498+
1499+__license__ = 'GPL v3'
1500+__copyright__ = u'Łukasz Grąbczewski 2011-2013'
1501+__version__ = '2.0'
1502+
1503+import re, zipfile, os
1504+from calibre.ptempfile import PersistentTemporaryFile
1505+from calibre.ebooks.conversion.cli import main
1506+
1507+class jazzpress(BasicNewsRecipe):
1508+ __author__ = u'Łukasz Grąbczewski'
1509+ title = 'JazzPRESS'
1510+ language = 'pl'
1511+ publisher = 'Fundacja Popularyzacji Muzyki Jazzowej EuroJAZZ'
1512+ publication_type = 'magazine'
1513+ description = u'Internetowa gazeta poświęcona muzyce improwizowanej'
1514+
1515+ conversion_options = {
1516+ 'authors' : 'Fundacja Popularyzacji Muzyki Jazzowej EuroJAZZ'
1517+ ,'publisher' : publisher
1518+ ,'language' : language
1519+ ,'preserve_cover_aspect_ratio': True
1520+ ,'remove_first_image': True
1521+ }
1522+
1523+ def build_index(self):
1524+ browser = self.get_browser()
1525+ rc = browser.open('http://radiojazz.fm/')
1526+
1527+ # find the link
1528+ epublink = browser.find_link(url_regex=re.compile('e_jazzpress\d\d\d\d\_epub'))
1529+
1530+ # download ebook
1531+ self.report_progress(0,_('Downloading ePUB'))
1532+ response = browser.follow_link(epublink)
1533+ book_file = PersistentTemporaryFile(suffix='.epub')
1534+ book_file.write(response.read())
1535+ book_file.close()
1536+
1537+ # convert
1538+ self.report_progress(0.2,_('Converting to OEB'))
1539+ oebdir = self.output_dir + '/INPUT/'
1540+ main(['ebook-convert', book_file.name, oebdir])
1541+
1542+ # feed calibre
1543+ index = os.path.join(oebdir, 'content.opf')
1544+
1545+ return index
1546
1547=== modified file 'recipes/konflikty_zbrojne.recipe'
1548--- recipes/konflikty_zbrojne.recipe 2012-10-17 14:12:08 +0000
1549+++ recipes/konflikty_zbrojne.recipe 2013-03-06 19:41:20 +0000
1550@@ -7,7 +7,7 @@
1551 __author__ = 'fenuks'
1552 cover_url = 'http://www.konflikty.pl/images/tapety_logo.jpg'
1553 language = 'pl'
1554- description ='military news'
1555+ description = u'Zbiór ciekawych artykułów historycznych, militarnych oraz recenzji książek, gier i filmów. Najświeższe informacje o lotnictwie, wojskach lądowych i polityce.'
1556 category='military, history'
1557 oldest_article = 7
1558 max_articles_per_feed = 100
1559
1560=== modified file 'recipes/kosmonauta_pl.recipe'
1561--- recipes/kosmonauta_pl.recipe 2012-12-14 09:32:01 +0000
1562+++ recipes/kosmonauta_pl.recipe 2013-03-06 19:41:20 +0000
1563@@ -7,7 +7,7 @@
1564 description = u'polskojęzyczny portal w całości dedykowany misjom kosmicznym i badaniom kosmosu.'
1565 category = 'astronomy'
1566 language = 'pl'
1567- cover_url='http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
1568+ cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg'
1569 no_stylesheets = True
1570 INDEX = 'http://www.kosmonauta.net'
1571 oldest_article = 7
1572@@ -24,6 +24,5 @@
1573 href = a['href']
1574 if not href.startswith('http'):
1575 a['href'] = self.INDEX + href
1576- print '%%%%%%%%%%%%%%%%%%%%%%%%%', a['href']
1577 return soup
1578-
1579\ No newline at end of file
1580+
1581
1582=== added file 'recipes/kurier_lubelski.recipe'
1583--- recipes/kurier_lubelski.recipe 1970-01-01 00:00:00 +0000
1584+++ recipes/kurier_lubelski.recipe 2013-03-06 19:41:20 +0000
1585@@ -0,0 +1,34 @@
1586+from calibre.web.feeds.news import BasicNewsRecipe
1587+
1588+class KurierLubelski(BasicNewsRecipe):
1589+ title = u'Kurier Lubelski'
1590+ __author__ = 'fenuks'
1591+ description = u'Gazeta Regionalna Kurier Lubelski. Najnowsze Wiadomości Lublin. Czytaj Informacje Lublin!'
1592+ category = 'newspaper'
1593+ language = 'pl'
1594+ encoding = 'iso-8859-2'
1595+ masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/kurierlubelski.png?24'
1596+ oldest_article = 7
1597+ max_articles_per_feed = 100
1598+ remove_empty_feeds = True
1599+ no_stylesheets = True
1600+ use_embedded_content = False
1601+ ignore_duplicate_articles = {'title', 'url'}
1602+ #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
1603+ remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
1604+ remove_tags=[dict(id='mat-podobne'), dict(name='a', attrs={'class':'czytajDalej'}), dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})]
1605+
1606+ feeds = [(u'Wiadomo\u015bci', u'http://kurierlubelski.feedsportal.com/c/32980/f/533785/index.rss?201302'), (u'Region', u'http://www.kurierlubelski.pl/rss/kurierlubelski_region.xml?201302'), (u'Sport', u'http://kurierlubelski.feedsportal.com/c/32980/f/533786/index.rss?201302'), (u'Kultura', u'http://kurierlubelski.feedsportal.com/c/32980/f/533787/index.rss?201302'), (u'Rozmaito\u015bci', u'http://www.kurierlubelski.pl/rss/kurierlubelski_rozmaitosci.xml?201302'), (u'Dom', u'http://www.kurierlubelski.pl/rss/kurierlubelski_dom.xml?201302'), (u'Serwisy', u'http://www.kurierlubelski.pl/rss/kurierlubelski_serwisy.xml?201302'), (u'Motofakty', u'http://www.kurierlubelski.pl/rss/kurierlubelski_motofakty.xml?201302'), (u'M\xf3j Reporter', u'http://www.kurierlubelski.pl/rss/kurierlubelski_mojreporter.xml?201302'), (u'Praca', u'http://www.kurierlubelski.pl/rss/kurierlubelski_praca.xml?201302')]
1607+
1608+ def print_version(self, url):
1609+ return url.replace('artykul', 'drukuj')
1610+
1611+ def skip_ad_pages(self, soup):
1612+ if 'Advertisement' in soup.title:
1613+ nexturl=soup.find('a')['href']
1614+ return self.index_to_soup(nexturl, raw=True)
1615+
1616+ def get_cover_url(self):
1617+ soup = self.index_to_soup('http://www.prasa24.pl/gazeta/kurier-lubelski/')
1618+ self.cover_url=soup.find(id='pojemnik').img['src']
1619+ return getattr(self, 'cover_url', self.cover_url)
1620
1621=== added file 'recipes/kurier_poranny.recipe'
1622--- recipes/kurier_poranny.recipe 1970-01-01 00:00:00 +0000
1623+++ recipes/kurier_poranny.recipe 2013-03-06 19:41:20 +0000
1624@@ -0,0 +1,78 @@
1625+import re
1626+from calibre.web.feeds.news import BasicNewsRecipe
1627+
1628+class KurierPoranny(BasicNewsRecipe):
1629+ title = u'Kurier Poranny'
1630+ __author__ = 'fenuks'
1631+ description = u'Kurier Poranny | poranny.pl - portal miejski Białegostoku,informacje,wydarzenia'
1632+ category = 'newspaper'
1633+ language = 'pl'
1634+ encoding = 'iso-8859-2'
1635+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
1636+ INDEX = 'http://www.poranny.pl'
1637+ masthead_url = INDEX + '/images/top_logo.png'
1638+ oldest_article = 7
1639+ max_articles_per_feed = 100
1640+ remove_empty_feeds = True
1641+ no_stylesheets = True
1642+ ignore_duplicate_articles = {'title', 'url'}
1643+
1644+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
1645+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
1646+
1647+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
1648+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
1649+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
1650+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
1651+ dict(attrs={'class':'articleFunctions'})]
1652+
1653+
1654+ feeds = [(u'Wszystkie', u'http://www.poranny.pl/rss.xml'),
1655+ (u'Białystok', u'http://www.poranny.pl/bialystok.xml'),
1656+ (u'Bielsk Podlaski', u'http://www.poranny.pl/bielskpodlaski.xml'),
1657+ (u'Czarna Białostocka', u'http://www.poranny.pl/czarnabialostocka.xml'),
1658+ (u'Hajnówka', u'http://www.poranny.pl/hajnowka.xml'),
1659+ (u'Łapy', u'http://www.poranny.pl/lapy.xml'),
1660+ (u'Sokółka', u'http://www.poranny.pl/sokolka.xml'),
1661+ (u'Supraśl', u'http://www.poranny.pl/suprasl.xml'),
1662+ (u'Wasilków', u'http://www.poranny.pl/wasilkow.xml'),
1663+ (u'Sport', u'http://www.poranny.pl/sport.xml'),
1664+ (u'Praca', u'http://www.poranny.pl/praca.xml'),
1665+ (u'Kultura', u'http://www.poranny.pl/kultura.xml'),
1666+ (u'Dom', u'http://www.poranny.pl/dom.xml'),
1667+ (u'Auto', u'http://www.poranny.pl/auto.xml'),
1668+ (u'Polityka', u'http://www.poranny.pl/polityka.xml')]
1669+
1670+ def get_cover_url(self):
1671+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
1672+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
1673+ soup = self.index_to_soup(nexturl)
1674+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
1675+ return getattr(self, 'cover_url', self.cover_url)
1676+
1677+ def append_page(self, soup, appendtag):
1678+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
1679+ if tag:
1680+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
1681+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
1682+
1683+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
1684+ r.extract()
1685+ for nr in range(2, number+1):
1686+ soup2 = self.index_to_soup(baseurl + str(nr))
1687+ pagetext = soup2.find(id='photoContainer')
1688+ if pagetext:
1689+ pos = len(appendtag.contents)
1690+ appendtag.insert(pos, pagetext)
1691+ pagetext = soup2.find(attrs={'class':'photoMeta'})
1692+ if pagetext:
1693+ pos = len(appendtag.contents)
1694+ appendtag.insert(pos, pagetext)
1695+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
1696+ if pagetext:
1697+ pos = len(appendtag.contents)
1698+ appendtag.insert(pos, pagetext)
1699+
1700+ def preprocess_html(self, soup):
1701+ self.append_page(soup, soup.body)
1702+ return soup
1703
1704=== added file 'recipes/kurier_szczecinski.recipe'
1705--- recipes/kurier_szczecinski.recipe 1970-01-01 00:00:00 +0000
1706+++ recipes/kurier_szczecinski.recipe 2013-03-06 19:41:20 +0000
1707@@ -0,0 +1,27 @@
1708+from calibre.web.feeds.news import BasicNewsRecipe
1709+
1710+class KurierSzczecinski(BasicNewsRecipe):
1711+ title = u'Kurier Szczeci\u0144ski'
1712+ __author__ = 'fenuks'
1713+ description = u'24Kurier jest portalem Kuriera Szczecińskiego. Zawiera aktualności ze Szczecina oraz wiadomości regionalne z województwa zachodniopomorskiego. '
1714+ category = 'newspaper'
1715+ #publication_type = ''
1716+ language = 'pl'
1717+ #encoding = ''
1718+ #extra_css = ''
1719+ cover_url = 'http://www.24kurier.pl/Administracja/Img/24kurier_logo-copy-po-zapis'
1720+ #masthead_url = ''
1721+ use_embedded_content = False
1722+ oldest_article = 7
1723+ max_articles_per_feed = 100
1724+ no_stylesheets = True
1725+ remove_empty_feeds = True
1726+ remove_javascript = True
1727+ remove_attributes = ['style', 'font']
1728+ ignore_duplicate_articles = {'title', 'url'}
1729+
1730+ keep_only_tags = [dict(attrs={'class':'section'})]
1731+ remove_tags = [dict(attrs={'class':['Ikonki', 'rek', 'artComments']})]
1732+ remove_tags_after = dict(attrs={'class':'artComments'})
1733+ #remove_tags_before = dict()
1734+ feeds = [(u'Aktualno\u015bci', u'http://www.24kurier.pl/cmspages/articles_rss.aspx'), (u'Kraj', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kraj'), (u'\u015awiat', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=swiat'), (u'Sport', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=sport'), (u'Kultura', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kultura'), (u'Gospodarka', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=gospodarka'), (u'Nauka', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=nauka'), (u'Region', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=region'), (u'Szczecin', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=szczecin'), (u'Bia\u0142ogard', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=bialogard'), (u'Choszczno', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=choszczno'), (u'Drawsko', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=drawsko'), (u'Goleni\xf3w', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=goleniow'), (u'Gryfice', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=gryfice'), (u'Gryfino', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=gryfino'), (u'Kamie\u0144 Pomorski', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kamien'), (u'Ko\u0142obrzeg', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=kolobrzeg'), (u'Koszalin', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=koszalin'), (u'\u0141obez', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=lobez'), (u'My\u015blib\xf3rz', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=mysliborz'), (u'Police', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=police'), (u'Pyrzyce', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=pyrzyce'), (u'S\u0142awno', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=slawno'), (u'Stargard', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=stargard'), (u'Szczecinek', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=szczecinek'), (u'\u015awidwin', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=swidwin'), (u'\u015awinouj\u015bcie', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=swinoujscie'), (u'Wa\u0142cz', u'http://www.24kurier.pl/cmspages/articles_rss.aspx?dzial=walcz')]
1735
1736=== modified file 'recipes/lomza.recipe'
1737--- recipes/lomza.recipe 2012-10-17 14:12:08 +0000
1738+++ recipes/lomza.recipe 2013-03-06 19:41:20 +0000
1739@@ -3,7 +3,7 @@
1740 class Lomza(BasicNewsRecipe):
1741 title = u'4Lomza'
1742 __author__ = 'fenuks'
1743- description = u'4Łomża - regional site'
1744+ description = u'Regionalny portal. Najświeższe informacje z regionu, kulturalne, sportowe. Ogłoszenia, baza biznesu, forum.'
1745 cover_url = 'http://www.4lomza.pl/i/logo4lomza_m.jpg'
1746 language = 'pl'
1747 oldest_article = 15
1748
1749=== modified file 'recipes/mlody_technik_pl.recipe'
1750--- recipes/mlody_technik_pl.recipe 2013-02-16 15:44:46 +0000
1751+++ recipes/mlody_technik_pl.recipe 2013-03-06 19:41:20 +0000
1752@@ -7,7 +7,7 @@
1753 description = u'Młody technik'
1754 category = 'science'
1755 language = 'pl'
1756- cover_url='http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
1757+ #cover_url = 'http://science-everywhere.pl/wp-content/uploads/2011/10/mt12.jpg'
1758 no_stylesheets = True
1759 preprocess_regexps = [(re.compile(r"<h4>Podobne</h4>", re.IGNORECASE), lambda m: '')]
1760 oldest_article = 7
1761@@ -18,10 +18,17 @@
1762 remove_tags = [dict(attrs={'class':'st-related-posts'})]
1763 remove_tags_after = dict(attrs={'class':'entry-content clearfix'})
1764 feeds = [(u'Wszystko', u'http://www.mt.com.pl/feed'),
1765- (u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'),
1766+ #(u'MT NEWS 24/7', u'http://www.mt.com.pl/kategoria/mt-newsy-24-7/feed'),
1767 (u'Info zoom', u'http://www.mt.com.pl/kategoria/info-zoom/feed'),
1768 (u'm.technik', u'http://www.mt.com.pl/kategoria/m-technik/feed'),
1769 (u'Szkoła', u'http://www.mt.com.pl/kategoria/szkola-2/feed'),
1770 (u'Na Warsztacie', u'http://www.mt.com.pl/kategoria/na-warsztacie/feed'),
1771 (u'Z pasji do...', u'http://www.mt.com.pl/kategoria/z-pasji-do/feed'),
1772 (u'MT testuje', u'http://www.mt.com.pl/kategoria/mt-testuje/feed')]
1773+
1774+ def get_cover_url(self):
1775+ soup = self.index_to_soup('http://www.mt.com.pl/')
1776+ tag = soup.find(attrs={'class':'xoxo'})
1777+ if tag:
1778+ self.cover_url = tag.find('img')['src']
1779+ return getattr(self, 'cover_url', self.cover_url)
1780
1781=== modified file 'recipes/niebezpiecznik.recipe'
1782--- recipes/niebezpiecznik.recipe 2011-08-20 17:45:04 +0000
1783+++ recipes/niebezpiecznik.recipe 2013-03-06 19:41:20 +0000
1784@@ -9,8 +9,8 @@
1785 oldest_article = 8
1786 max_articles_per_feed = 100
1787 no_stylesheets = True
1788- cover_url =u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
1789- remove_tags=[dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
1790- keep_only_tags= [dict(name='div', attrs={'class':['title', 'entry']})]
1791+ cover_url = u'http://userlogos.org/files/logos/Karmody/niebezpiecznik_01.png'
1792+ remove_tags = [dict(name='div', attrs={'class':['sociable']}), dict(name='h4'), dict(attrs={'class':'similar-posts'})]
1793+ keep_only_tags = [dict(name='div', attrs={'class':['title', 'entry']})]
1794 feeds = [(u'Wiadomości', u'http://feeds.feedburner.com/niebezpiecznik/'),
1795 ('Blog', 'http://feeds.feedburner.com/niebezpiecznik/linkblog/')]
1796
1797=== modified file 'recipes/nowa_fantastyka.recipe'
1798--- recipes/nowa_fantastyka.recipe 2013-01-25 12:49:36 +0000
1799+++ recipes/nowa_fantastyka.recipe 2013-03-06 19:41:20 +0000
1800@@ -9,7 +9,7 @@
1801 __modified_by__ = 'zaslav'
1802 language = 'pl'
1803 encoding='latin2'
1804- description ='site for fantasy readers'
1805+ description = u'Strona dla miłośników fantastyki'
1806 category='fantasy'
1807 masthead_url='http://farm5.static.flickr.com/4133/4956658792_7ba7fbf562.jpg'
1808 #extra_css='.tytul {font-size: 20px;}' #not working
1809
1810=== added file 'recipes/nto.recipe'
1811--- recipes/nto.recipe 1970-01-01 00:00:00 +0000
1812+++ recipes/nto.recipe 2013-03-06 19:41:20 +0000
1813@@ -0,0 +1,63 @@
1814+import re
1815+from calibre.web.feeds.news import BasicNewsRecipe
1816+
1817+class NTO(BasicNewsRecipe):
1818+ title = u'Nowa Trybuna Opolska'
1819+ __author__ = 'fenuks'
1820+ description = u'Nowa Trybuna Opolska - portal regionalny województwa opolskiego.'
1821+ category = 'newspaper'
1822+ language = 'pl'
1823+ encoding = 'iso-8859-2'
1824+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
1825+ INDEX = 'http://www.nto.pl'
1826+ masthead_url = INDEX + '/images/top_logo.png'
1827+ oldest_article = 7
1828+ max_articles_per_feed = 100
1829+ remove_empty_feeds = True
1830+ no_stylesheets = True
1831+ ignore_duplicate_articles = {'title', 'url'}
1832+
1833+ preprocess_regexps = [(re.compile(ur'Czytaj:.*?</a>', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
1834+ (re.compile(ur'Przeczytaj również:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: '')]
1835+
1836+ keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
1837+ remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
1838+ 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
1839+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
1840+ dict(attrs={'class':'articleFunctions'})]
1841+
1842+ feeds = [(u'Wszystkie', u'http://www.nto.pl/rss.xml'), (u'Region', u'http://www.nto.pl/region.xml'), (u'Brzeg', u'http://www.nto.pl/brzeg.xml'), (u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'), (u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'), (u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'), (u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'), (u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'), (u'Nysa', u'http://www.nto.pl/nysa.xml'), (u'Olesno', u'http://www.nto.pl/olesno.xml'), (u'Opole', u'http://www.nto.pl/opole.xml'), (u'Prudnik', u'http://www.nto.pl/prudnik.xml'), (u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'), (u'Sport', u'http://www.nto.pl/sport.xml'), (u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'), (u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'), (u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'), (u'Studia', u'http://www.nto.pl/akademicka.xml')]
1843+
1844+ def get_cover_url(self):
1845+ soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
1846+ nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
1847+ soup = self.index_to_soup(nexturl)
1848+ self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
1849+ return getattr(self, 'cover_url', self.cover_url)
1850+
1851+ def append_page(self, soup, appendtag):
1852+ tag = soup.find('span', attrs={'class':'photoNavigationPages'})
1853+ if tag:
1854+ number = int(tag.string.rpartition('/')[-1].replace('&nbsp;', ''))
1855+ baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
1856+
1857+ for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
1858+ r.extract()
1859+ for nr in range(2, number+1):
1860+ soup2 = self.index_to_soup(baseurl + str(nr))
1861+ pagetext = soup2.find(id='photoContainer')
1862+ if pagetext:
1863+ pos = len(appendtag.contents)
1864+ appendtag.insert(pos, pagetext)
1865+ pagetext = soup2.find(attrs={'class':'photoMeta'})
1866+ if pagetext:
1867+ pos = len(appendtag.contents)
1868+ appendtag.insert(pos, pagetext)
1869+ pagetext = soup2.find(attrs={'class':'photoStoryText'})
1870+ if pagetext:
1871+ pos = len(appendtag.contents)
1872+ appendtag.insert(pos, pagetext)
1873+
1874+ def preprocess_html(self, soup):
1875+ self.append_page(soup, soup.body)
1876+ return soup
1877
1878=== modified file 'recipes/pc_foster.recipe'
1879--- recipes/pc_foster.recipe 2012-02-20 05:41:32 +0000
1880+++ recipes/pc_foster.recipe 2013-03-06 19:41:20 +0000
1881@@ -7,12 +7,12 @@
1882 description = u'Vortal technologiczny: testy, recenzje sprzętu komputerowego i telefonów, nowinki hardware, programy i gry dla Windows. Podkręcanie, modding i Overclocking.'
1883 category = 'IT'
1884 language = 'pl'
1885- masthead_url='http://pcfoster.pl/public/images/logo.png'
1886- cover_url= 'http://pcfoster.pl/public/images/logo.png'
1887- no_stylesheets= True
1888- remove_empty_feeds= True
1889- keep_only_tags= [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})]
1890- remove_tags=[dict(name='p', attrs={'class':'right'})]
1891+ masthead_url = 'http://pcfoster.pl/public/images/logo.png'
1892+ cover_url = 'http://pcfoster.pl/public/images/logo.png'
1893+ no_stylesheets = True
1894+ remove_empty_feeds = True
1895+ keep_only_tags = [dict(id=['news_details', 'review_details']), dict(attrs={'class':'pager more_top'})]
1896+ remove_tags = [dict(name='p', attrs={'class':'right'})]
1897 feeds = [(u'G\u0142\xf3wny', u'http://pcfoster.pl/public/rss/main.xml')]
1898
1899
1900@@ -32,4 +32,4 @@
1901
1902 def preprocess_html(self, soup):
1903 self.append_page(soup, soup.body)
1904- return soup
1905\ No newline at end of file
1906+ return soup
1907
1908=== modified file 'recipes/polska_times.recipe'
1909--- recipes/polska_times.recipe 2012-10-17 14:12:08 +0000
1910+++ recipes/polska_times.recipe 2013-03-06 19:41:20 +0000
1911@@ -7,9 +7,11 @@
1912 language = 'pl'
1913 masthead_url = 'http://s.polskatimes.pl/g/logo_naglowek/polska.gif?17'
1914 oldest_article = 7
1915+ encoding = 'iso-8859-2'
1916 max_articles_per_feed = 100
1917- remove_emty_feeds= True
1918+ remove_empty_feeds = True
1919 no_stylesheets = True
1920+ use_embedded_content = False
1921 ignore_duplicate_articles = {'title', 'url'}
1922 #preprocess_regexps = [(re.compile(ur'<b>Czytaj także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur',<b>Czytaj też:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>Zobacz także:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<center><h4><a.*?</a></h4></center>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TEŻ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ WIĘCEJ:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>CZYTAJ TAKŻE:.*?</b>', re.DOTALL), lambda match: ''), (re.compile(ur'<b>\* CZYTAJ KONIECZNIE:.*', re.DOTALL), lambda match: '</body>'), (re.compile(ur'<b>Nasze serwisy:</b>.*', re.DOTALL), lambda match: '</body>') ]
1923 remove_tags_after= dict(attrs={'src':'http://nm.dz.com.pl/dz.png'})
1924
1925=== modified file 'recipes/spiders_web_pl.recipe'
1926--- recipes/spiders_web_pl.recipe 2013-03-05 20:11:58 +0000
1927+++ recipes/spiders_web_pl.recipe 2013-03-06 19:41:20 +0000
1928@@ -4,7 +4,7 @@
1929 title = u"Spider's Web"
1930 oldest_article = 7
1931 __author__ = 'fenuks'
1932- description = u'Opinie i analizy na temat technologii'
1933+ description = u'Autorskie teksty popularnych blogerów, testy sprzętu i aplikacji, oraz wiele więcej.'
1934 cover_url = 'http://www.spidersweb.pl/wp-content/themes/new_sw/images/spidersweb.png'
1935 category = 'IT, WEB'
1936 language = 'pl'
1937
1938=== modified file 'recipes/tablety_pl.recipe'
1939--- recipes/tablety_pl.recipe 2012-03-21 03:22:11 +0000
1940+++ recipes/tablety_pl.recipe 2013-03-06 19:41:20 +0000
1941@@ -3,7 +3,7 @@
1942 class Tablety_pl(BasicNewsRecipe):
1943 title = u'Tablety.pl'
1944 __author__ = 'fenuks'
1945- description = u'tablety.pl - latest tablet news'
1946+ description = u'Tablety, gry i aplikacje na tablety.'
1947 masthead_url= 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
1948 cover_url = 'http://www.tablety.pl/wp-content/themes/kolektyw/img/logo.png'
1949 category = 'IT'
1950
1951=== modified file 'recipes/tanuki.recipe'
1952--- recipes/tanuki.recipe 2012-04-18 04:03:44 +0000
1953+++ recipes/tanuki.recipe 2013-03-06 19:41:20 +0000
1954@@ -4,6 +4,7 @@
1955 title = u'Tanuki'
1956 oldest_article = 7
1957 __author__ = 'fenuks'
1958+ description = u'Tanuki - portal o anime i mandze.'
1959 category = 'anime, manga'
1960 language = 'pl'
1961 max_articles_per_feed = 100
1962@@ -42,4 +43,4 @@
1963 a['href']='http://manga.tanuki.pl' + a['href']
1964 elif 'tanuki-czytelnia' in soup.title.string.lower():
1965 a['href']='http://czytelnia.tanuki.pl' + a['href']
1966- return soup
1967\ No newline at end of file
1968+ return soup
1969
1970=== added file 'recipes/trojmiasto_pl.recipe'
1971--- recipes/trojmiasto_pl.recipe 1970-01-01 00:00:00 +0000
1972+++ recipes/trojmiasto_pl.recipe 2013-03-06 19:41:20 +0000
1973@@ -0,0 +1,37 @@
1974+import re
1975+from calibre.web.feeds.news import BasicNewsRecipe
1976+
1977+class Trojmiasto(BasicNewsRecipe):
1978+ title = u'Tr\xf3jmiasto.pl'
1979+ __author__ = 'fenuks'
1980+ description = u'Wiadomości, imprezy, wydarzenia, spektakle.Gdańsk, Gdynia, Sopot - NOCLEGI, Katalog firm, repertuar kin, wydarzenia, przewodnik, mapa, kwatery, hotele. Portal regionalny trojmiasto.pl'
1981+ category = ''
1982+ #publication_type = ''
1983+ language = 'pl'
1984+ encoding = 'iso-8859-2'
1985+ extra_css = 'ul {list-style: none; padding:0; margin:0;}'
1986+ cover_url = 'http://www.trojmiasto.pl/_img/toplong2/logo_trojmiasto.gif'
1987+ #masthead_url = ''
1988+ use_embedded_content = False
1989+ oldest_article = 7
1990+ max_articles_per_feed = 100
1991+ no_stylesheets = True
1992+ remove_empty_feeds = True
1993+ remove_javascript = True
1994+ remove_attributes = ['style', 'font']
1995+ ignore_duplicate_articles = {'title', 'url'}
1996+
1997+ preprocess_regexps = [(re.compile(ur'<strong>Czytaj więcej.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'<strong>Zobacz też.*?</a>', re.DOTALL|re.IGNORECASE), lambda match: ''),
1998+ (re.compile(ur'<b>[A-ZĄĆĘŁŃÓŚŹŻ \-,.:]*?</b>', re.DOTALL), lambda match: ''),]
1999+
2000+ #keep_only_tags = []
2001+ remove_tags = [dict(id=['logo', 'font_small', 'font_big']), dict(attrs={'class':['title-long', 'ankieta', 'newsletter-inside-content newsletter-wrap', 'copyright_box',
2002+ 'logo', 'btn btn-photo-add', 'related-info-wrap', 'nTabs', 'article-list', 'rate-player horizontal', 'type-box', 'rate-player'
2003+ 'hover-nav', 'live-head tC', 'prev-link', 'next-link', 'ie6']}), dict(attrs={'title':[u'drukuj artykuł', u'podziel się na Facebooku', u'prześlij artykuł']})]
2004+ remove_tags_after = dict(attrs={'class':'author-wrap'})
2005+ remove_tags_before = dict(attrs={'class':'text-container'})
2006+
2007+ feeds = [(u'Wszystkie', u'http://rss.trojmiasto.pl/rss,0.xml'), (u'Fakty i opinie', u'http://rss.trojmiasto.pl/rss,1.xml'), (u'Sport', u'http://rss.trojmiasto.pl/rss,2.xml'), (u'Dom', u'http://rss.trojmiasto.pl/rss,3.xml'), (u'Moto', u'http://rss.trojmiasto.pl/rss,4.xml'), (u'Nauka', u'http://rss.trojmiasto.pl/rss,5.xml'), (u'Rozrywka', u'http://rss.trojmiasto.pl/rss,6.xml'), (u'Kultura', u'http://rss.trojmiasto.pl/rss,7.xml'), (u'Rowery', u'http://rss.trojmiasto.pl/rss,8.xml'), (u'Dziecko', u'http://rss.trojmiasto.pl/rss,9.xml'), (u'Zdrowie i uroda', u'http://rss.trojmiasto.pl/rss,10.xml'), (u'Praca', u'http://rss.trojmiasto.pl/rss,11.xml'), (u'Artyku\u0142y czytelnik\xf3w', u'http://rss.trojmiasto.pl/rss,12.xml'), (u'Korki', u'http://rss.trojmiasto.pl/rss,13.xml'), (u'Historia', u'http://rss.trojmiasto.pl/rss,14.xml'), (u'Biznes', u'http://rss.trojmiasto.pl/rss,16.xml'), (u'Kryminalne Tr\xf3jmiasto', u'http://rss.trojmiasto.pl/rss,17.xml'), (u'Przewodnik', u'http://rss.trojmiasto.pl/rss,18.xml'), (u'Aktywne Tr\xf3jmiasto', u'http://rss.trojmiasto.pl/rss,19.xml'), (u'Delux', u'http://rss.trojmiasto.pl/rss,20.xml')]
2008+
2009+ def print_version(self, url):
2010+ return url + '?print=1'
2011
2012=== modified file 'recipes/tvn24.recipe'
2013--- recipes/tvn24.recipe 2013-03-04 03:41:11 +0000
2014+++ recipes/tvn24.recipe 2013-03-06 19:41:20 +0000
2015@@ -8,8 +8,8 @@
2016 description = u'Sport, Biznes, Gospodarka, Informacje, Wiadomości Zawsze aktualne wiadomości z Polski i ze świata'
2017 category = 'news'
2018 language = 'pl'
2019- masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
2020- cover_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
2021+ #masthead_url= 'http://www.tvn24.pl/_d/topmenu/logo2.gif'
2022+ cover_url= 'http://www.qzdrowiu.pl/Upload/KnowQZdrowiu_PressOffice/TVN24_logo_575702b7-edce-4b6f-a41b-4395f9456f96_ff6d6ccf-528a-4b94-9e61-2fed727aba35.png'
2023 extra_css= 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
2024 remove_empty_feeds = True
2025 remove_javascript = True
2026
2027=== modified file 'recipes/ubuntu_pl.recipe'
2028--- recipes/ubuntu_pl.recipe 2012-02-20 04:34:40 +0000
2029+++ recipes/ubuntu_pl.recipe 2013-03-06 19:41:20 +0000
2030@@ -3,7 +3,7 @@
2031 class Ubuntu_pl(BasicNewsRecipe):
2032 title = u'UBUNTU.pl'
2033 __author__ = 'fenuks'
2034- description = 'UBUNTU.pl - polish ubuntu community site'
2035+ description = 'Polskie forum użytkowników Ubuntu Linux. Projekty, porady i dyskusje, gotowe rozwiązania problemów.'
2036 masthead_url= 'http://ubuntu.pl/img/logo.jpg'
2037 cover_url = 'http://ubuntu.pl/img/logo.jpg'
2038 category = 'linux, IT'
2039
2040=== added file 'recipes/zycie_warszawy.recipe'
2041--- recipes/zycie_warszawy.recipe 1970-01-01 00:00:00 +0000
2042+++ recipes/zycie_warszawy.recipe 2013-03-06 19:41:20 +0000
2043@@ -0,0 +1,46 @@
2044+#!/usr/bin/env python
2045+# -*- coding: utf-8 -*-
2046+
2047+__license__ = 'GPL v3'
2048+__copyright__ = u'Łukasz Grąbczewski 2012-2013'
2049+__version__ = '1.1'
2050+
2051+'''
2052+zw.com.pl
2053+'''
2054+
2055+from calibre.web.feeds.news import BasicNewsRecipe
2056+
2057+class zyciewarszawy(BasicNewsRecipe):
2058+ __author__ = u'Łukasz Grączewski'
2059+ title = u'Życie Warszawy'
2060+ description = u'Wiadomości z Warszawy'
2061+ language = 'pl'
2062+ publisher = 'Presspublica'
2063+ publication_type = 'newspapper'
2064+ masthead_url = 'http://www.zw.com.pl/static/img/logo_zw.gif'
2065+ no_stylesheets = True
2066+ remove_javascript = True
2067+
2068+ oldest_article = 1 #daily news only
2069+ max_articles_per_feed = 100
2070+
2071+ feeds = [(u'Najnowsze', u'http://www.zw.com.pl/rss/1.html')]
2072+
2073+ keep_only_tags = []
2074+ keep_only_tags.append(dict(name = 'div', attrs = {'id' : 'storyp'}))
2075+
2076+ remove_tags = []
2077+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'authordate'}))
2078+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'author'}))
2079+ '''remove_tags.append(dict(name = 'div', attrs = {'class' : 'seealso'}))'''
2080+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'more'}))
2081+ remove_tags.append(dict(name = 'div', attrs = {'class' : 'clr'}))
2082+ remove_tags.append(dict(name = 'div', attrs = {'id' : 'adk_0'}))
2083+ remove_tags.append(dict(name = 'div', attrs = {'id' : 'adsense_0'}))
2084+ remove_tags.append(dict(name = 'div', attrs = {'id' : 'share_bottom'}))
2085+ remove_tags.append(dict(name = 'div', attrs = {'id' : 'copyright_law'}))
2086+
2087+ def print_version(self, url):
2088+ url += "?print=tak"
2089+ return url

Subscribers

People subscribed via source and target branches