diff --git a/recipes/android_com_pl.recipe b/recipes/android_com_pl.recipe
index a4a387d414..4b5455cfc6 100644
--- a/recipes/android_com_pl.recipe
+++ b/recipes/android_com_pl.recipe
@@ -1,3 +1,4 @@
+import re
from calibre.web.feeds.news import BasicNewsRecipe
class Android_com_pl(BasicNewsRecipe):
@@ -6,8 +7,9 @@ class Android_com_pl(BasicNewsRecipe):
description = u'Android.com.pl - to największe w Polsce centrum Android OS. Znajdziesz tu: nowości, forum, pomoc, recenzje, gry, aplikacje.'
category = 'Android, mobile'
language = 'pl'
- use_embedded_content=True
- cover_url =u'http://android.com.pl/wp-content/themes/android/images/logo.png'
+ use_embedded_content = True
+ cover_url = 'http://android.com.pl/wp-content/themes/android/images/logo.png'
oldest_article = 8
max_articles_per_feed = 100
- feeds = [(u'Android', u'http://android.com.pl/feed/')]
+ preprocess_regexps = [(re.compile(ur'
.{,1}
', re.DOTALL), lambda match: '')]
+ feeds = [(u'Android', u'http://android.com.pl/feed/')]
\ No newline at end of file
diff --git a/recipes/astro_news_pl.recipe b/recipes/astro_news_pl.recipe
index b7a15a9809..5189154f3b 100644
--- a/recipes/astro_news_pl.recipe
+++ b/recipes/astro_news_pl.recipe
@@ -10,13 +10,9 @@ class AstroNEWS(BasicNewsRecipe):
#extra_css= 'table {text-align: left;}'
no_stylesheets=True
cover_url='http://news.astronet.pl/img/logo_news.jpg'
+ remove_attributes = ['width', 'align']
remove_tags=[dict(name='hr')]
feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')]
def print_version(self, url):
return url.replace('astronet.pl/', 'astronet.pl/print.cgi?')
-
- def preprocess_html(self, soup):
- for item in soup.findAll(align=True):
- del item['align']
- return soup
diff --git a/recipes/badania_net.recipe b/recipes/badania_net.recipe
index c47e9b6f54..3ccf6be88c 100644
--- a/recipes/badania_net.recipe
+++ b/recipes/badania_net.recipe
@@ -14,7 +14,6 @@ class BadaniaNet(BasicNewsRecipe):
preprocess_regexps = [(re.compile(r"Tekst sponsoruje
", re.IGNORECASE), lambda m: ''),]
remove_empty_feeds = True
use_embedded_content = False
- remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})]
- remove_tags_after = dict(attrs={'class':'omc-single-tags'})
- keep_only_tags = [dict(id='omc-full-article')]
+ remove_tags = []
+ keep_only_tags = [dict(name='article')]
feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')]
\ No newline at end of file
diff --git a/recipes/cdrinfo_pl.recipe b/recipes/cdrinfo_pl.recipe
index 2e75eee622..db5fd386f9 100644
--- a/recipes/cdrinfo_pl.recipe
+++ b/recipes/cdrinfo_pl.recipe
@@ -19,14 +19,16 @@ class cdrinfo(BasicNewsRecipe):
no_stylesheets = True
remove_empty_feeds = True
remove_javascript = True
- remove_attributes = ['style']
- preprocess_regexps = [(re.compile(u']*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com\.
', re.DOTALL), lambda match: '')]
+ remove_attributes = ['style', 'onmouseover']
+ preprocess_regexps = [(re.compile(u']*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com\.
', re.DOTALL), lambda match: ''),
+ (re.compile(u']*?>.{,2}
', re.DOTALL), lambda match: '')]
ignore_duplicate_articles = {'title', 'url'}
keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id=['text', 'text2'])]
- remove_tags = [dict(attrs={'class':['navigation', 'sociable', 'last6news']}), dict(name='hr'), dict(id='respond')]
+ remove_tags = [dict(attrs={'class':['navigation', 'sociable', 'last6news']}), dict(name=['hr', 'br']), dict(id='respond')]
remove_tags_after = dict(id='artnawigacja')
- feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
+ feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'),
+ (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'),
(u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'),
(u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml')
]
diff --git a/recipes/cgm_pl.recipe b/recipes/cgm_pl.recipe
index 8b0e4c084f..8f598c2803 100644
--- a/recipes/cgm_pl.recipe
+++ b/recipes/cgm_pl.recipe
@@ -14,31 +14,31 @@ class CGM(BasicNewsRecipe):
remove_empty_feeds= True
max_articles_per_feed = 100
no_stylesheets = True
- extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}'
+ extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;} img {display: block;} ul.galleryImagesList {list-style: none;} li.item {float: left;} .calibrenavbar {clear: both;}'
remove_tags_before=dict(id='mainContent')
remove_tags_after=dict(name='div', attrs={'class':'fbContainer'})
remove_tags=[dict(name='div', attrs={'class':['fbContainer', 'socials']}),
dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}),
- dict(id=['movieShare', 'container'])]
+ dict(id=['movieShare', 'container']), dict(name='br')]
feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'),
(u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')]
-
def preprocess_html(self, soup):
- gallery=soup.find('div', attrs={'class':'galleryFlash'})
- if gallery:
- img=gallery.div
+ gallery = soup.find('div', attrs={'class':'galleryFlash'})
+ if gallery and gallery.div:
+ img = gallery.div
gallery.img.extract()
if img:
- img=img['style']
- img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
- gallery.contents[1].name='img'
- gallery.contents[1]['src']=img
+ img = img['style']
+ img = 'http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')]
+ gallery.contents[1].name = 'img'
+ gallery.contents[1]['src'] = img
pos = len(gallery.contents)
gallery.insert(pos, BeautifulSoup('
'))
+
for item in soup.findAll(style=True):
del item['style']
- ad=soup.findAll('a')
+ ad = soup.findAll('a')
for r in ad:
if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']:
r.extract()
diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe
index cdd0630891..a8161420fb 100644
--- a/recipes/dzieje_pl.recipe
+++ b/recipes/dzieje_pl.recipe
@@ -16,7 +16,7 @@ class Dzieje(BasicNewsRecipe):
remove_javascript = True
no_stylesheets = True
keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')]
- remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')]
+ remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory'), dict(name='blockquote')]
#feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')]
def append_page(self, soup, appendtag):
diff --git a/recipes/dziennik_pl.recipe b/recipes/dziennik_pl.recipe
index 44dd596324..cc9e4e1e77 100644
--- a/recipes/dziennik_pl.recipe
+++ b/recipes/dziennik_pl.recipe
@@ -18,22 +18,22 @@ class Dziennik_pl(BasicNewsRecipe):
remove_javascript = True
remove_empty_feeds = True
ignore_duplicate_articles = {'title', 'url'}
- extra_css = 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}'
+ extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}'
preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('>>> CZYTAJ TAKŻE: ".*?"
'), lambda m: '')]
keep_only_tags = [dict(id='article')]
- remove_tags = [dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']})]
+ remove_tags = [dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')]
feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'),
- (u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
- (u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
- (u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
- (u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
- (u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
- (u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
- (u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
- (u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
- (u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
- (u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
- (u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
+ (u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'),
+ (u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'),
+ (u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'),
+ (u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'),
+ (u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'),
+ (u'Film', u'http://rss.dziennik.pl/Dziennik-Film'),
+ (u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'),
+ (u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'),
+ (u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'),
+ (u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'),
+ (u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')]
def skip_ad_pages(self, soup):
tag = soup.find(name='a', attrs={'title':'CZYTAJ DALEJ'})
diff --git a/recipes/echo_dnia.recipe b/recipes/echo_dnia.recipe
index 7dc913d062..8a44341b4a 100644
--- a/recipes/echo_dnia.recipe
+++ b/recipes/echo_dnia.recipe
@@ -25,7 +25,7 @@ class EchoDnia(BasicNewsRecipe):
keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
- 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
+ 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline', 'articleZoomText']),
dict(attrs={'class':'articleFunctions'})]
feeds = [(u'Wszystkie', u'http://www.echodnia.eu/rss.xml'),
diff --git a/recipes/fotoblogia_pl.recipe b/recipes/fotoblogia_pl.recipe
index a482390e0c..4d9947e4ec 100644
--- a/recipes/fotoblogia_pl.recipe
+++ b/recipes/fotoblogia_pl.recipe
@@ -7,11 +7,11 @@ class Fotoblogia_pl(BasicNewsRecipe):
category = 'photography'
language = 'pl'
masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg'
- cover_url= 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
+ cover_url = 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg'
oldest_article = 7
max_articles_per_feed = 100
no_stylesheets = True
use_embedded_content = False
- keep_only_tags=[dict(name='div', attrs={'class':['post-view post-standard', 'photo-container']})]
- remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})]
+ keep_only_tags = [dict(name='article')]
+ remove_tags = [dict(attrs={'class':'article-related'})]
feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')]
diff --git a/recipes/gazeta_lubuska.recipe b/recipes/gazeta_lubuska.recipe
index f2a42b63b8..bc11f680b7 100644
--- a/recipes/gazeta_lubuska.recipe
+++ b/recipes/gazeta_lubuska.recipe
@@ -16,19 +16,12 @@ class GazetaLubuska(BasicNewsRecipe):
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
+ use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
- preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''),
- (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')]
-
- keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
- remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
- 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
- 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
- dict(attrs={'class':'articleFunctions'})]
-
feeds = [(u'Wszystkie', u'http://www.gazetalubuska.pl/rss.xml'), (u'Dreznenko', u'http://www.gazetalubuska.pl/drezdenko.xml'), (u'G\u0142og\xf3w', u'http://www.gazetalubuska.pl/glogow.xml'), (u'Gorz\xf3w Wielkopolski', u'http://www.gazetalubuska.pl/gorzow-wielkopolski.xml'), (u'Gubin', u'http://www.gazetalubuska.pl/gubin.xml'), (u'Kostrzyn', u'http://www.gazetalubuska.pl/kostrzyn.xml'), (u'Krosno Odrza\u0144skie', u'http://www.gazetalubuska.pl/krosno-odrzanskie.xml'), (u'Lubsko', u'http://www.gazetalubuska.pl/lubsko.xml'), (u'Mi\u0119dzych\xf3d', u'http://www.gazetalubuska.pl/miedzychod.xml'), (u'Mi\u0119dzyrzecz', u'http://www.gazetalubuska.pl/miedzyrzecz.xml'), (u'Nowa S\xf3l', u'http://www.gazetalubuska.pl/nowa-sol.xml'), (u'S\u0142ubice', u'http://www.gazetalubuska.pl/slubice.xml'), (u'Strzelce Kraje\u0144skie', u'http://www.gazetalubuska.pl/strzelce-krajenskie.xml'), (u'Sulech\xf3w', u'http://www.gazetalubuska.pl/sulechow.xml'), (u'Sul\u0119cin', u'http://www.gazetalubuska.pl/sulecin.xml'), (u'\u015awi\u0119bodzin', u'http://www.gazetalubuska.pl/swiebodzin.xml'), (u'Wolsztyn', u'http://www.gazetalubuska.pl/wolsztyn.xml'), (u'Wschowa', u'http://www.gazetalubuska.pl/wschowa.xml'), (u'Zielona G\xf3ra', u'http://www.gazetalubuska.pl/zielona-gora.xml'), (u'\u017baga\u0144', u'http://www.gazetalubuska.pl/zagan.xml'), (u'\u017bary', u'http://www.gazetalubuska.pl/zary.xml'), (u'Sport', u'http://www.gazetalubuska.pl/sport.xml'), (u'Auto', u'http://www.gazetalubuska.pl/auto.xml'), (u'Dom', u'http://www.gazetalubuska.pl/dom.xml'), (u'Praca', u'http://www.gazetalubuska.pl/praca.xml'), (u'Zdrowie', u'http://www.gazetalubuska.pl/zdrowie.xml')]
+ keep_only_tags = [dict(id='article')]
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
@@ -37,33 +30,12 @@ class GazetaLubuska(BasicNewsRecipe):
self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
- def append_page(self, soup, appendtag):
- tag = soup.find('span', attrs={'class':'photoNavigationPages'})
- if tag:
- number = int(tag.string.rpartition('/')[-1].replace(' ', ''))
- baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
+ def decode_feedportal_url(self, url):
+ link = url.rpartition('l/0L0S')[2][:-12]
+ replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
+ for t in replaces:
+ link = link.replace(*t)
+ return 'http://' + link
- for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
- r.extract()
- for nr in range(2, number+1):
- soup2 = self.index_to_soup(baseurl + str(nr))
- pagetext = soup2.find(id='photoContainer')
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoMeta'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoStoryText'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
-
- comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
- for comment in comments:
- comment.extract()
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
+ def print_version(self, url):
+ return self.decode_feedportal_url(url) + '&Template=printpicart'
\ No newline at end of file
diff --git a/recipes/gazeta_pomorska.recipe b/recipes/gazeta_pomorska.recipe
index a4dc8ed1ea..b2a6932eb3 100644
--- a/recipes/gazeta_pomorska.recipe
+++ b/recipes/gazeta_pomorska.recipe
@@ -16,17 +16,9 @@ class GazetaPomorska(BasicNewsRecipe):
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
+ use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
- preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''),
- (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')]
-
- keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
- remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
- 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
- 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
- dict(attrs={'class':'articleFunctions'})]
-
feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'),
(u'Region', u'http://www.pomorska.pl/region.xml'),
(u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'),
@@ -57,6 +49,8 @@ class GazetaPomorska(BasicNewsRecipe):
#(u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'),
(u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')]
+ keep_only_tags = [dict(id='article')]
+
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
@@ -64,33 +58,12 @@ class GazetaPomorska(BasicNewsRecipe):
self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
- def append_page(self, soup, appendtag):
- tag = soup.find('span', attrs={'class':'photoNavigationPages'})
- if tag:
- number = int(tag.string.rpartition('/')[-1].replace(' ', ''))
- baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
+ def decode_feedportal_url(self, url):
+ link = url.rpartition('l/0L0S')[2][:-12]
+ replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
+ for t in replaces:
+ link = link.replace(*t)
+ return 'http://' + link
- for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
- r.extract()
- for nr in range(2, number+1):
- soup2 = self.index_to_soup(baseurl + str(nr))
- pagetext = soup2.find(id='photoContainer')
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoMeta'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoStoryText'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
-
- comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
- for comment in comments:
- comment.extract()
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
+ def print_version(self, url):
+ return self.decode_feedportal_url(url) + '&Template=printpicart'
diff --git a/recipes/gazeta_wspolczesna.recipe b/recipes/gazeta_wspolczesna.recipe
index 6648d8eb1a..357d37c18f 100644
--- a/recipes/gazeta_wspolczesna.recipe
+++ b/recipes/gazeta_wspolczesna.recipe
@@ -16,19 +16,13 @@ class GazetaWspolczesna(BasicNewsRecipe):
max_articles_per_feed = 100
remove_empty_feeds = True
no_stylesheets = True
+ use_embedded_content = False
ignore_duplicate_articles = {'title', 'url'}
-
- preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''),
- (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')]
- keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
- remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
- 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
- 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
- dict(attrs={'class':'articleFunctions'})]
-
feeds = [(u'Wszystkie', u'http://www.wspolczesna.pl/rss.xml'), (u'August\xf3w', u'http://www.wspolczesna.pl/augustow.xml'), (u'Bia\u0142ystok', u'http://www.wspolczesna.pl/bialystok.xml'), (u'Bielsk Podlaski', u'http://www.wspolczesna.pl/bielsk.xml'), (u'E\u0142k', u'http://www.wspolczesna.pl/elk.xml'), (u'Grajewo', u'http://www.wspolczesna.pl/grajewo.xml'), (u'Go\u0142dap', u'http://www.wspolczesna.pl/goldap.xml'), (u'Hajn\xf3wka', u'http://www.wspolczesna.pl/hajnowka.xml'), (u'Kolno', u'http://www.wspolczesna.pl/kolno.xml'), (u'\u0141om\u017ca', u'http://www.wspolczesna.pl/lomza.xml'), (u'Mo\u0144ki', u'http://www.wspolczesna.pl/monki.xml'), (u'Olecko', u'http://www.wspolczesna.pl/olecko.xml'), (u'Ostro\u0142\u0119ka', u'http://www.wspolczesna.pl/ostroleka.xml'), (u'Powiat Bia\u0142ostocki', u'http://www.wspolczesna.pl/powiat.xml'), (u'Sejny', u'http://www.wspolczesna.pl/sejny.xml'), (u'Siemiatycze', u'http://www.wspolczesna.pl/siemiatycze.xml'), (u'Sok\xf3\u0142ka', u'http://www.wspolczesna.pl/sokolka.xml'), (u'Suwa\u0142ki', u'http://www.wspolczesna.pl/suwalki.xml'), (u'Wysokie Mazowieckie', u'http://www.wspolczesna.pl/wysokie.xml'), (u'Zambr\xf3w', u'http://www.wspolczesna.pl/zambrow.xml'), (u'Sport', u'http://www.wspolczesna.pl/sport.xml'), (u'Praca', u'http://www.wspolczesna.pl/praca.xml'), (u'Dom', u'http://www.wspolczesna.pl/dom.xml'), (u'Auto', u'http://www.wspolczesna.pl/auto.xml'), (u'Zdrowie', u'http://www.wspolczesna.pl/zdrowie.xml')]
+ keep_only_tags = [dict(id='article')]
+
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
@@ -36,33 +30,12 @@ class GazetaWspolczesna(BasicNewsRecipe):
self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
- def append_page(self, soup, appendtag):
- tag = soup.find('span', attrs={'class':'photoNavigationPages'})
- if tag:
- number = int(tag.string.rpartition('/')[-1].replace(' ', ''))
- baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
+ def decode_feedportal_url(self, url):
+ link = url.rpartition('l/0L0S')[2][:-12]
+ replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
+ for t in replaces:
+ link = link.replace(*t)
+ return 'http://' + link
- for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
- r.extract()
- for nr in range(2, number+1):
- soup2 = self.index_to_soup(baseurl + str(nr))
- pagetext = soup2.find(id='photoContainer')
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoMeta'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoStoryText'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
-
- comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
- for comment in comments:
- comment.extract()
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
+ def print_version(self, url):
+ return self.decode_feedportal_url(url) + '&Template=printpicart'
diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe
index 653c776723..db8d56816b 100644
--- a/recipes/gazeta_wyborcza.recipe
+++ b/recipes/gazeta_wyborcza.recipe
@@ -99,9 +99,8 @@ class Gazeta_Wyborcza(BasicNewsRecipe):
def get_cover_url(self):
soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html')
- cover = soup.find(id='GWmini2')
- soup = self.index_to_soup('http://wyborcza.pl/' + cover.contents[3].a['href'])
- self.cover_url = 'http://wyborcza.pl' + soup.img['src']
+ cover = soup.find(attrs={'class':'gallerycontent'})
+ self.cover_url = cover.ul.li.a.img['src'].replace('P.jpg', '.jpg')
return getattr(self, 'cover_url', self.cover_url)
def image_url_processor(self, baseurl, url):
diff --git a/recipes/gcn.recipe b/recipes/gcn.recipe
index 5f403bfd73..a1b488002f 100644
--- a/recipes/gcn.recipe
+++ b/recipes/gcn.recipe
@@ -18,14 +18,7 @@ class GCN(BasicNewsRecipe):
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
remove_attributes = ['style']
- preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''),
- (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')]
-
- keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
- remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
- 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
- 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
- dict(attrs={'class':'articleFunctions'})]
+ use_embedded_content = False
feeds = [(u'Wszystkie', u'http://www.nowiny24.pl/rss.xml'),
(u'Podkarpacie', u'http://www.nowiny24.pl/podkarpacie.xml'),
@@ -49,6 +42,8 @@ class GCN(BasicNewsRecipe):
(u'Zdrowie', u'http://www.nowiny24.pl/zdrowie.xml'),
(u'Wywiady', u'http://www.nowiny24.pl/wywiady.xml')]
+ keep_only_tags = [dict(id='article')]
+
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
@@ -56,33 +51,12 @@ class GCN(BasicNewsRecipe):
self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
- def append_page(self, soup, appendtag):
- tag = soup.find('span', attrs={'class':'photoNavigationPages'})
- if tag:
- number = int(tag.string.rpartition('/')[-1].replace(' ', ''))
- baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
+ def decode_feedportal_url(self, url):
+ link = url.rpartition('l/0L0S')[2][:-12]
+ replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
+ for t in replaces:
+ link = link.replace(*t)
+ return 'http://' + link
- for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
- r.extract()
- for nr in range(2, number+1):
- soup2 = self.index_to_soup(baseurl + str(nr))
- pagetext = soup2.find(id='photoContainer')
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoMeta'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoStoryText'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
-
- comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
- for comment in comments:
- comment.extract()
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
+ def print_version(self, url):
+ return self.decode_feedportal_url(url) + '&Template=printpicart'
\ No newline at end of file
diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe
index 67d18737f9..2c5f2e37e3 100644
--- a/recipes/gram_pl.recipe
+++ b/recipes/gram_pl.recipe
@@ -16,7 +16,7 @@ class Gram_pl(BasicNewsRecipe):
#extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}'
cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png'
keep_only_tags= [dict(id='articleModule')]
- remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']}), dict(name='aside')]
+ remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']}), dict(name='aside'), dict(id='metaColumn')]
feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'),
(u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles')
]
diff --git a/recipes/gry_online_pl.recipe b/recipes/gry_online_pl.recipe
index 2876a9b4e8..e9868155df 100644
--- a/recipes/gry_online_pl.recipe
+++ b/recipes/gry_online_pl.recipe
@@ -15,7 +15,8 @@ class GryOnlinePl(BasicNewsRecipe):
max_articles_per_feed = 100
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'class':['gc660', 'gc660 S013', 'news_endpage_tit', 'news_container', 'news']})]
- remove_tags = [dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})]
+ remove_tags = [dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2',
+ 'twitter-share-button']})]
feeds = [
(u'Newsy', 'http://www.gry-online.pl/rss/news.xml'),
('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')]
@@ -44,7 +45,7 @@ class GryOnlinePl(BasicNewsRecipe):
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
- for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry']}):
+ for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'imh10b']}):
r.extract()
comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
@@ -80,7 +81,7 @@ class GryOnlinePl(BasicNewsRecipe):
[comment.extract() for comment in comments]
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
- for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'S018strony']}):
+ for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'S018strony', 'imh10b']}):
r.extract()
comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
for comment in comments:
diff --git a/recipes/kosmonauta_pl.recipe b/recipes/kosmonauta_pl.recipe
index e93853bd57..e7dc85e0d4 100644
--- a/recipes/kosmonauta_pl.recipe
+++ b/recipes/kosmonauta_pl.recipe
@@ -30,4 +30,7 @@ class Kosmonauta(BasicNewsRecipe):
href = a['href']
if not href.startswith('http'):
a['href'] = self.INDEX + href
+ for a in soup.findAll(name='img'):
+ if a.has_key('style') and 'float:' in a['style']:
+ a['class'] = 'thumb-left'
return soup
\ No newline at end of file
diff --git a/recipes/kurier_poranny.recipe b/recipes/kurier_poranny.recipe
index 5c2f2d8e1e..3fe7033faf 100644
--- a/recipes/kurier_poranny.recipe
+++ b/recipes/kurier_poranny.recipe
@@ -17,16 +17,7 @@ class KurierPoranny(BasicNewsRecipe):
remove_empty_feeds = True
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
-
- preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''),
- (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')]
-
- keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
- remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
- 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
- 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
- dict(attrs={'class':'articleFunctions'})]
-
+ use_embedded_content = False
feeds = [(u'Wszystkie', u'http://www.poranny.pl/rss.xml'),
(u'Białystok', u'http://www.poranny.pl/bialystok.xml'),
@@ -44,6 +35,8 @@ class KurierPoranny(BasicNewsRecipe):
(u'Auto', u'http://www.poranny.pl/auto.xml'),
(u'Polityka', u'http://www.poranny.pl/polityka.xml')]
+ keep_only_tags = [dict(id='article')]
+
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
@@ -51,34 +44,12 @@ class KurierPoranny(BasicNewsRecipe):
self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
- def append_page(self, soup, appendtag):
- tag = soup.find('span', attrs={'class':'photoNavigationPages'})
- if tag:
- number = int(tag.string.rpartition('/')[-1].replace(' ', ''))
- baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
+ def decode_feedportal_url(self, url):
+ link = url.rpartition('l/0L0S')[2][:-12]
+ replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
+ for t in replaces:
+ link = link.replace(*t)
+ return 'http://' + link
- for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
- r.extract()
- for nr in range(2, number+1):
- soup2 = self.index_to_soup(baseurl + str(nr))
- pagetext = soup2.find(id='photoContainer')
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoMeta'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoStoryText'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
-
- comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
- for comment in comments:
- comment.extract()
-
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
+ def print_version(self, url):
+ return self.decode_feedportal_url(url) + '&Template=printpicart'
diff --git a/recipes/media2.recipe b/recipes/media2.recipe
index d685a90803..7cd542457f 100644
--- a/recipes/media2.recipe
+++ b/recipes/media2.recipe
@@ -12,6 +12,7 @@ class media2_pl(BasicNewsRecipe):
description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.'
masthead_url = 'http://media2.pl/res/logo/www.png'
cover_url = 'http://media2.pl/res/logo/www.png'
+ INDEX = 'http://media2.pl'
remove_empty_feeds = True
oldest_article = 7
max_articles_per_feed = 100
@@ -22,10 +23,16 @@ class media2_pl(BasicNewsRecipe):
extra_css = '''.news-lead{font-weight: bold; }'''
keep_only_tags = [dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})]
- remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : 'item-sidebar'}), dict(name = 'div', attrs = {'class' : 'news-tags'})]
+ remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : ['item-sidebar', 'news-inline-promo nobbtext']}),
+ dict(name = 'div', attrs = {'class' : 'news-tags'})]
feeds = [(u'Media2', u'http://feeds.feedburner.com/media2'), (u'Internet', u'http://feeds.feedburner.com/media2/internet'),
(u'Media', 'http://feeds.feedburner.com/media2/media'), (u'Telekomunikacja', 'http://feeds.feedburner.com/media2/telekomunikacja'),
(u'Reklama/PR', 'http://feeds.feedburner.com/media2/reklama-pr'), (u'Technologie', 'http://feeds.feedburner.com/media2/technologie'),
(u'Badania', 'http://feeds.feedburner.com/media2/badania')
- ]
\ No newline at end of file
+ ]
+
+ def image_url_processor(self, baseurl, url):
+ if url[0] == '/':
+ url = self.INDEX + url
+ return url
\ No newline at end of file
diff --git a/recipes/nto.recipe b/recipes/nto.recipe
index ed29db22ba..f38ccebeaf 100644
--- a/recipes/nto.recipe
+++ b/recipes/nto.recipe
@@ -17,18 +17,12 @@ class NTO(BasicNewsRecipe):
remove_empty_feeds = True
no_stylesheets = True
ignore_duplicate_articles = {'title', 'url'}
-
- preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''),
- (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')]
-
- keep_only_tags = [dict(id=['article', 'cover', 'photostory'])]
- remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections',
- 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections',
- 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']),
- dict(attrs={'class':'articleFunctions'})]
+ use_embedded_content = False
feeds = [(u'Wszystkie', u'http://www.nto.pl/rss.xml'), (u'Region', u'http://www.nto.pl/region.xml'), (u'Brzeg', u'http://www.nto.pl/brzeg.xml'), (u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'), (u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'), (u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'), (u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'), (u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'), (u'Nysa', u'http://www.nto.pl/nysa.xml'), (u'Olesno', u'http://www.nto.pl/olesno.xml'), (u'Opole', u'http://www.nto.pl/opole.xml'), (u'Prudnik', u'http://www.nto.pl/prudnik.xml'), (u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'), (u'Sport', u'http://www.nto.pl/sport.xml'), (u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'), (u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'), (u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'), (u'Studia', u'http://www.nto.pl/akademicka.xml')]
+ keep_only_tags = [dict(id='article')]
+
def get_cover_url(self):
soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI')
nexturl = self.INDEX + soup.find(id='covers').find('a')['href']
@@ -36,33 +30,12 @@ class NTO(BasicNewsRecipe):
self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src']
return getattr(self, 'cover_url', self.cover_url)
- def append_page(self, soup, appendtag):
- tag = soup.find('span', attrs={'class':'photoNavigationPages'})
- if tag:
- number = int(tag.string.rpartition('/')[-1].replace(' ', ''))
- baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1]
+ def decode_feedportal_url(self, url):
+ link = url.rpartition('l/0L0S')[2][:-12]
+ replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_'))
+ for t in replaces:
+ link = link.replace(*t)
+ return 'http://' + link
- for r in appendtag.findAll(attrs={'class':'photoNavigation'}):
- r.extract()
- for nr in range(2, number+1):
- soup2 = self.index_to_soup(baseurl + str(nr))
- pagetext = soup2.find(id='photoContainer')
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoMeta'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
- pagetext = soup2.find(attrs={'class':'photoStoryText'})
- if pagetext:
- pos = len(appendtag.contents)
- appendtag.insert(pos, pagetext)
-
- comments = appendtag.findAll(text=lambda text:isinstance(text, Comment))
- for comment in comments:
- comment.extract()
-
- def preprocess_html(self, soup):
- self.append_page(soup, soup.body)
- return soup
+ def print_version(self, url):
+ return self.decode_feedportal_url(url) + '&Template=printpicart'
diff --git a/recipes/presseurop.recipe b/recipes/presseurop.recipe
index ea06eb0c32..dacabcf43f 100644
--- a/recipes/presseurop.recipe
+++ b/recipes/presseurop.recipe
@@ -17,6 +17,7 @@ class presseurop(BasicNewsRecipe):
oldest_article = 7
max_articles_per_feed = 100
auto_cleanup = True
+ remove_empty_feeds = True
feeds = [
(u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'),
diff --git a/recipes/stopklatka.recipe b/recipes/stopklatka.recipe
index 1f629b1225..70198e497b 100644
--- a/recipes/stopklatka.recipe
+++ b/recipes/stopklatka.recipe
@@ -8,7 +8,7 @@ class Stopklatka_pl(BasicNewsRecipe):
category = 'movies'
language = 'pl'
encoding = 'utf-8'
- extra_css = 'img {display: block;} ul {list-style-type: none;} li {display: inline;}'
+ extra_css = 'img {display: block;} ul {list-style-type: none;}'
cover_url = 'http://static1.stopklatka.pl/images/20/19/11501.jpg'
use_embedded_content = False
oldest_article = 7
@@ -23,7 +23,7 @@ class Stopklatka_pl(BasicNewsRecipe):
keep_only_tags = [dict(attrs={'class':'asset-full-content default-asset-publisher show-asset-title'})]
remove_tags = [dict(attrs={'class':['metadata-entry metadata-tags', 'print-action', 'asset-flag', 'asset-ratings', 'ad-nav']}), dict(id='contest')]
- feeds = [(u'Wiadomo\u015bci', u'http://stopklatka.pl/wiadomosci/-/asset_publisher/Hl7x4Ku4GpZj/rss?p_p_cacheability=cacheLevelPage'), (u'Artyku\u0142y', u'http://stopklatka.pl/artykuly/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage'), (u'Premiery i zapowiedzi', u'http://stopklatka.pl/premiery-i-zapowiedzi?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13393201&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13760176&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=15238425&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13470227&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13913324&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20234402&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13917041&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13905169&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14253975&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21586017&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13540662&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=12999052&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=45280408&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14826890&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13459998&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13070805&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20209965&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21741457&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=35577381&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13530138&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13392987%2Cmartwe-zlo&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13760162%2Cuklad-zamkniety&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F15238403%2Cwszyscy-w-naszej-rodzinie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13470213%2Cdonoma&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13913310%2Ccristiada&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20234381%2Craj-wiara&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13917027%2Cintruz&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13905155%2Cspring-breakers&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14253957%2Ckrudowie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21586004%2Cswieta-czworca&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13540648%2Ckwartet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F12999038%2Cimagine&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280404%2Cdom-na-kolkach&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14826876%2Cg-i-joe-odwet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13459984%2Cnieobliczalni&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13070591%2Csamotny-port-milosc&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20209952%2Czanim-noc-nas-nie-rozdzieli&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21741444%2Chemel&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35577377%2Czywie-bielarus-&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13530124%2Cpanaceum'), (u'Recenzje filmowe', u'http://stopklatka.pl/box-office/-/asset_publisher/3yxqotUEiqHJ/rss?p_p_cacheability=cacheLevelPage'), (u'Recenzje', u'http://stopklatka.pl/recenzje/-/asset_publisher/5oZ3s2J3L0tG/rss?p_p_cacheability=cacheLevelPage'), (u'Gwiazdy', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/PqN7MDEGWGvh/rss?p_p_cacheability=cacheLevelPage'), (u'Wywiady Stopklatki', u'http://stopklatka.pl/wywiady/-/asset_publisher/uVh3OrZCaLd7/rss?p_p_cacheability=cacheLevelPage'), (u'Prosto z Hollywood', u'http://stopklatka.pl/wywiady-z-hollywood/-/asset_publisher/YsbU0JSoxb9G/rss?p_p_cacheability=cacheLevelPage'), (u'Plotki', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/XuF8EGAkVeTa/rss?p_p_cacheability=cacheLevelPage'), (u'Box Office Polska', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=47982267&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=46685247&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=45280313&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47982263%2Cbox-office-weekendowy-polska-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46685243%2Cbox-office-weekendowy-polska-08-03-2013-10-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280309%2Cbox-office-weekendowy-polska-01-03-2013-03-03-2013'), (u'Box Office USA', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=2&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=49047234&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=48879358&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47605057&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47809980&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=46505246&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F49047230%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879354%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47605053%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47809976%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46505242%2Cbox-office-weekendowy-stany-zjednoczone-08-03-2013-10-03-2013'), (u'Relacje', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/IkgAkSFxLWV2/rss?p_p_cacheability=cacheLevelPage'), (u'Kalendarium imprez', u'http://stopklatka.pl/kalendarium-imprez?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628974&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627805&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45317244&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884855&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629292&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884742&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482058&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627893&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482076&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627838&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48167620&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482067&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47811744&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482049&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629615&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088670&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628531&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481950&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481496&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482022&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323743&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628034&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628064&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088819&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482031&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481415&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481977&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323617&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481932&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481995&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628970%2C4-festiwal-filmow-swiata-trzy-zywioly&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627801%2Cwielka-podroz-krudow&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45317240%2C6-przeglad-kina-rosyjskiego-nowe-kino-rosyjskie-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884851%2C2-1-nowy-cykl-spotkan-literatury-z-filmem&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629288%2C5-festiwal-polskich-filmow-krotkometrazowych-short-waves&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884738%2Cmoico-enjoy-movies-przeglad-filmow-klasy-b-we-wroclawiu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482054%2C1-ogolnopolski-festiwal-polskiej-animacji-o-pla-2013-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627889%2Cviii-festiwal-filmow-afrykanskich-afrykamera-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482072%2C6-miedzynarodowy-festwial-kina-niezaleznego-off-plus-camera&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627834%2C11-przeglad-filmow-studenckich-z-lodzkiej-filmowki-lodzia-po-wisle-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48167616%2Cweze-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482063%2Cxiv-festiwal-kina-amatorskiego-i-niezaleznego-kan&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47811740%2Cv-festiwal-muzyki-filmowej-krzysztofa-komedy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482045%2Ckonkurs-scenariuszowy-script-pro-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629611%2C9-miedzynarodowy-festiwal-filmowy-%E2%80%9Ezydowskie-motywy%E2%80%9D&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088666%2C15-przeglad-filmowy-cieszyn-kino-na-granicy-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628527%2Cdzien-filmowca-filmmaker-s-day&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481946%2C10-planete-doc-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481492%2C66-miedzynarodowy-festiwal-filmowy-w-cannes&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482018%2C16-festiwal-filmow-kultowych&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323739%2C53-krakowski-festiwal-filmowy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628030%2C4-festiwal-filmow-mlodziezowych-18&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628060%2Cvii-superorbitalny-festiwal-filmow-amatorskich-soffa&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088815%2Cxv-miedzynarodowy-festiwal-filmow-przyrodniczych-im-wlodzimierza-puchalskiego&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482027%2C32-koszalinski-festiwal-debiutow-filmowych-mlodzi-i-film-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481411%2C6-miedzynarodowy-festiwal-filmow-animowanych-animator-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481973%2C13-miedzynarodowy-festiwal-filmowy-sopot-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323613%2C13-miedzynarodowy-festiwal-filmowy-t-mobile-nowe-horyzonty&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481928%2C7-festiwal-filmu-i-sztuki-dwa-brzegi-w-kazimierzu-dolnym-i-janowcu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481991%2C3-miedzynarodowy-festiwal-filmu-i-muzyki-transatlantyk'), (u'Konkursy', u'http://stopklatka.pl/konkursy?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=3&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=47091950&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48879762&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48880109&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47091941%2Cksiazki-dwie-kobiety-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879753%2Cdvd-rozmowy-noca-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48880000%2Cdvd-milosc-'), (u'Komiks Stopklatki', u'http://stopklatka.pl/komiks/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage')]
+ feeds = [(u'Wiadomo\u015bci', u'http://stopklatka.pl/wiadomosci/-/asset_publisher/Hl7x4Ku4GpZj/rss?p_p_cacheability=cacheLevelPage'), (u'Artyku\u0142y', u'http://stopklatka.pl/artykuly/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage'), (u'Premiery i zapowiedzi', u'http://stopklatka.pl/premiery-i-zapowiedzi?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13393201&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13760176&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=15238425&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13470227&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13913324&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20234402&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13917041&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13905169&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14253975&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21586017&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13540662&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=12999052&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=45280408&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14826890&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13459998&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13070805&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20209965&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21741457&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=35577381&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13530138&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13392987%2Cmartwe-zlo&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13760162%2Cuklad-zamkniety&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F15238403%2Cwszyscy-w-naszej-rodzinie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13470213%2Cdonoma&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13913310%2Ccristiada&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20234381%2Craj-wiara&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13917027%2Cintruz&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13905155%2Cspring-breakers&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14253957%2Ckrudowie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21586004%2Cswieta-czworca&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13540648%2Ckwartet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F12999038%2Cimagine&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280404%2Cdom-na-kolkach&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14826876%2Cg-i-joe-odwet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13459984%2Cnieobliczalni&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13070591%2Csamotny-port-milosc&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20209952%2Czanim-noc-nas-nie-rozdzieli&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21741444%2Chemel&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35577377%2Czywie-bielarus-&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13530124%2Cpanaceum'), (u'Recenzje filmowe', u'http://stopklatka.pl/box-office/-/asset_publisher/3yxqotUEiqHJ/rss?p_p_cacheability=cacheLevelPage'), (u'Recenzje', u'http://stopklatka.pl/recenzje/-/asset_publisher/5oZ3s2J3L0tG/rss?p_p_cacheability=cacheLevelPage'), (u'Gwiazdy', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/PqN7MDEGWGvh/rss?p_p_cacheability=cacheLevelPage'), (u'Wywiady Stopklatki', u'http://stopklatka.pl/wywiady/-/asset_publisher/uVh3OrZCaLd7/rss?p_p_cacheability=cacheLevelPage'), (u'Prosto z Hollywood', u'http://stopklatka.pl/wywiady-z-hollywood/-/asset_publisher/YsbU0JSoxb9G/rss?p_p_cacheability=cacheLevelPage'), (u'Plotki', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/XuF8EGAkVeTa/rss?p_p_cacheability=cacheLevelPage'), (u'Box Office Polska', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=47982267&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=46685247&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=45280313&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47982263%2Cbox-office-weekendowy-polska-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46685243%2Cbox-office-weekendowy-polska-08-03-2013-10-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280309%2Cbox-office-weekendowy-polska-01-03-2013-03-03-2013'), (u'Box Office USA', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=2&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=49047234&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=48879358&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47605057&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47809980&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=46505246&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F49047230%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879354%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47605053%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47809976%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46505242%2Cbox-office-weekendowy-stany-zjednoczone-08-03-2013-10-03-2013'), (u'Kalendarium imprez', u'http://stopklatka.pl/kalendarium-imprez?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628974&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627805&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45317244&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884855&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629292&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884742&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482058&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627893&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482076&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627838&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48167620&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482067&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47811744&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482049&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629615&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088670&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628531&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481950&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481496&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482022&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323743&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628034&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628064&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088819&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482031&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481415&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481977&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323617&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481932&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481995&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628970%2C4-festiwal-filmow-swiata-trzy-zywioly&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627801%2Cwielka-podroz-krudow&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45317240%2C6-przeglad-kina-rosyjskiego-nowe-kino-rosyjskie-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884851%2C2-1-nowy-cykl-spotkan-literatury-z-filmem&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629288%2C5-festiwal-polskich-filmow-krotkometrazowych-short-waves&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884738%2Cmoico-enjoy-movies-przeglad-filmow-klasy-b-we-wroclawiu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482054%2C1-ogolnopolski-festiwal-polskiej-animacji-o-pla-2013-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627889%2Cviii-festiwal-filmow-afrykanskich-afrykamera-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482072%2C6-miedzynarodowy-festwial-kina-niezaleznego-off-plus-camera&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627834%2C11-przeglad-filmow-studenckich-z-lodzkiej-filmowki-lodzia-po-wisle-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48167616%2Cweze-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482063%2Cxiv-festiwal-kina-amatorskiego-i-niezaleznego-kan&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47811740%2Cv-festiwal-muzyki-filmowej-krzysztofa-komedy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482045%2Ckonkurs-scenariuszowy-script-pro-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629611%2C9-miedzynarodowy-festiwal-filmowy-%E2%80%9Ezydowskie-motywy%E2%80%9D&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088666%2C15-przeglad-filmowy-cieszyn-kino-na-granicy-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628527%2Cdzien-filmowca-filmmaker-s-day&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481946%2C10-planete-doc-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481492%2C66-miedzynarodowy-festiwal-filmowy-w-cannes&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482018%2C16-festiwal-filmow-kultowych&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323739%2C53-krakowski-festiwal-filmowy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628030%2C4-festiwal-filmow-mlodziezowych-18&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628060%2Cvii-superorbitalny-festiwal-filmow-amatorskich-soffa&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088815%2Cxv-miedzynarodowy-festiwal-filmow-przyrodniczych-im-wlodzimierza-puchalskiego&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482027%2C32-koszalinski-festiwal-debiutow-filmowych-mlodzi-i-film-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481411%2C6-miedzynarodowy-festiwal-filmow-animowanych-animator-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481973%2C13-miedzynarodowy-festiwal-filmowy-sopot-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323613%2C13-miedzynarodowy-festiwal-filmowy-t-mobile-nowe-horyzonty&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481928%2C7-festiwal-filmu-i-sztuki-dwa-brzegi-w-kazimierzu-dolnym-i-janowcu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481991%2C3-miedzynarodowy-festiwal-filmu-i-muzyki-transatlantyk'), (u'Konkursy', u'http://stopklatka.pl/konkursy?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=3&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=47091950&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48879762&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48880109&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47091941%2Cksiazki-dwie-kobiety-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879753%2Cdvd-rozmowy-noca-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48880000%2Cdvd-milosc-'), (u'Komiks Stopklatki', u'http://stopklatka.pl/komiks/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage')]
def append_page(self, soup, appendtag):
tag = soup.find('a', attrs={'class': 'next'})
diff --git a/recipes/tablety_pl.recipe b/recipes/tablety_pl.recipe
index 827a86180e..04022726f8 100644
--- a/recipes/tablety_pl.recipe
+++ b/recipes/tablety_pl.recipe
@@ -14,5 +14,5 @@ class Tablety_pl(BasicNewsRecipe):
max_articles_per_feed = 100
preprocess_regexps = [(re.compile(ur'Przeczytaj także.*?
', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj koniecznie.*?
', re.DOTALL), lambda match: '')]
keep_only_tags = [dict(id='news_block')]
- remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments', 'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer']})]
+ remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments', 'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})]
feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')]
\ No newline at end of file