diff --git a/recipes/android_com_pl.recipe b/recipes/android_com_pl.recipe index a4a387d414..4b5455cfc6 100644 --- a/recipes/android_com_pl.recipe +++ b/recipes/android_com_pl.recipe @@ -1,3 +1,4 @@ +import re from calibre.web.feeds.news import BasicNewsRecipe class Android_com_pl(BasicNewsRecipe): @@ -6,8 +7,9 @@ class Android_com_pl(BasicNewsRecipe): description = u'Android.com.pl - to największe w Polsce centrum Android OS. Znajdziesz tu: nowości, forum, pomoc, recenzje, gry, aplikacje.' category = 'Android, mobile' language = 'pl' - use_embedded_content=True - cover_url =u'http://android.com.pl/wp-content/themes/android/images/logo.png' + use_embedded_content = True + cover_url = 'http://android.com.pl/wp-content/themes/android/images/logo.png' oldest_article = 8 max_articles_per_feed = 100 - feeds = [(u'Android', u'http://android.com.pl/feed/')] + preprocess_regexps = [(re.compile(ur'

.{,1}

', re.DOTALL), lambda match: '')] + feeds = [(u'Android', u'http://android.com.pl/feed/')] \ No newline at end of file diff --git a/recipes/astro_news_pl.recipe b/recipes/astro_news_pl.recipe index b7a15a9809..5189154f3b 100644 --- a/recipes/astro_news_pl.recipe +++ b/recipes/astro_news_pl.recipe @@ -10,13 +10,9 @@ class AstroNEWS(BasicNewsRecipe): #extra_css= 'table {text-align: left;}' no_stylesheets=True cover_url='http://news.astronet.pl/img/logo_news.jpg' + remove_attributes = ['width', 'align'] remove_tags=[dict(name='hr')] feeds = [(u'Wiadomości', u'http://news.astronet.pl/rss.cgi')] def print_version(self, url): return url.replace('astronet.pl/', 'astronet.pl/print.cgi?') - - def preprocess_html(self, soup): - for item in soup.findAll(align=True): - del item['align'] - return soup diff --git a/recipes/badania_net.recipe b/recipes/badania_net.recipe index c47e9b6f54..3ccf6be88c 100644 --- a/recipes/badania_net.recipe +++ b/recipes/badania_net.recipe @@ -14,7 +14,6 @@ class BadaniaNet(BasicNewsRecipe): preprocess_regexps = [(re.compile(r"

Tekst sponsoruje

", re.IGNORECASE), lambda m: ''),] remove_empty_feeds = True use_embedded_content = False - remove_tags = [dict(attrs={'class':['omc-flex-category', 'omc-comment-count', 'omc-single-tags']})] - remove_tags_after = dict(attrs={'class':'omc-single-tags'}) - keep_only_tags = [dict(id='omc-full-article')] + remove_tags = [] + keep_only_tags = [dict(name='article')] feeds = [(u'Psychologia', u'http://badania.net/category/psychologia/feed/'), (u'Technologie', u'http://badania.net/category/technologie/feed/'), (u'Biologia', u'http://badania.net/category/biologia/feed/'), (u'Chemia', u'http://badania.net/category/chemia/feed/'), (u'Zdrowie', u'http://badania.net/category/zdrowie/'), (u'Seks', u'http://badania.net/category/psychologia-ewolucyjna-tematyka-seks/feed/')] \ No newline at end of file diff --git a/recipes/cdrinfo_pl.recipe b/recipes/cdrinfo_pl.recipe index 2e75eee622..db5fd386f9 100644 --- a/recipes/cdrinfo_pl.recipe +++ b/recipes/cdrinfo_pl.recipe @@ -19,14 +19,16 @@ class cdrinfo(BasicNewsRecipe): no_stylesheets = True remove_empty_feeds = True remove_javascript = True - remove_attributes = ['style'] - preprocess_regexps = [(re.compile(u']*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com\.

', re.DOTALL), lambda match: '')] + remove_attributes = ['style', 'onmouseover'] + preprocess_regexps = [(re.compile(u']*?>Uprzejmie prosimy o przestrzeganie netykiety.+?www\.gravatar\.com\.

', re.DOTALL), lambda match: ''), + (re.compile(u']*?>.{,2}

', re.DOTALL), lambda match: '')] ignore_duplicate_articles = {'title', 'url'} keep_only_tags = [dict(name='input', attrs={'name':'ref'}), dict(id=['text', 'text2'])] - remove_tags = [dict(attrs={'class':['navigation', 'sociable', 'last6news']}), dict(name='hr'), dict(id='respond')] + remove_tags = [dict(attrs={'class':['navigation', 'sociable', 'last6news']}), dict(name=['hr', 'br']), dict(id='respond')] remove_tags_after = dict(id='artnawigacja') - feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'), + feeds = [(u'Wiadomości', 'http://feeds.feedburner.com/cdrinfo'), + (u'Recenzje', 'http://www.cdrinfo.pl/rss/rss_recenzje.php'), (u'Konsole', 'http://konsole.cdrinfo.pl/rss/rss_konsole_news.xml'), (u'Pliki', 'http://www.cdrinfo.pl/rss/rss_pliki.xml') ] diff --git a/recipes/cgm_pl.recipe b/recipes/cgm_pl.recipe index 8b0e4c084f..8f598c2803 100644 --- a/recipes/cgm_pl.recipe +++ b/recipes/cgm_pl.recipe @@ -14,31 +14,31 @@ class CGM(BasicNewsRecipe): remove_empty_feeds= True max_articles_per_feed = 100 no_stylesheets = True - extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;}' + extra_css = 'div {color:black;} strong {color:black;} span {color:black;} p {color:black;} h2 {color:black;} img {display: block;} ul.galleryImagesList {list-style: none;} li.item {float: left;} .calibrenavbar {clear: both;}' remove_tags_before=dict(id='mainContent') remove_tags_after=dict(name='div', attrs={'class':'fbContainer'}) remove_tags=[dict(name='div', attrs={'class':['fbContainer', 'socials']}), dict(name='p', attrs={'class':['tagCloud', 'galleryAuthor']}), - dict(id=['movieShare', 'container'])] + dict(id=['movieShare', 'container']), dict(name='br')] feeds = [(u'Informacje', u'http://www.cgm.pl/rss.xml'), (u'Polecamy', u'http://www.cgm.pl/rss,4,news.xml'), (u'Recenzje', u'http://www.cgm.pl/rss,1,news.xml')] - def preprocess_html(self, soup): - gallery=soup.find('div', attrs={'class':'galleryFlash'}) - if gallery: - img=gallery.div + gallery = soup.find('div', attrs={'class':'galleryFlash'}) + if gallery and gallery.div: + img = gallery.div gallery.img.extract() if img: - img=img['style'] - img='http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')] - gallery.contents[1].name='img' - gallery.contents[1]['src']=img + img = img['style'] + img = 'http://www.cgm.pl'+img[img.find('url(')+4:img.find(')')] + gallery.contents[1].name = 'img' + gallery.contents[1]['src'] = img pos = len(gallery.contents) gallery.insert(pos, BeautifulSoup('
')) + for item in soup.findAll(style=True): del item['style'] - ad=soup.findAll('a') + ad = soup.findAll('a') for r in ad: if 'www.hustla.pl' in r['href'] or 'www.ebilet.pl' in r['href']: r.extract() diff --git a/recipes/dzieje_pl.recipe b/recipes/dzieje_pl.recipe index cdd0630891..a8161420fb 100644 --- a/recipes/dzieje_pl.recipe +++ b/recipes/dzieje_pl.recipe @@ -16,7 +16,7 @@ class Dzieje(BasicNewsRecipe): remove_javascript = True no_stylesheets = True keep_only_tags = [dict(name='h1', attrs={'class':'title'}), dict(id='content-area')] - remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory')] + remove_tags = [dict(attrs={'class':'field field-type-computed field-field-tagi'}), dict(id='dogory'), dict(name='blockquote')] #feeds = [(u'Dzieje', u'http://dzieje.pl/rss.xml')] def append_page(self, soup, appendtag): diff --git a/recipes/dziennik_pl.recipe b/recipes/dziennik_pl.recipe index 44dd596324..cc9e4e1e77 100644 --- a/recipes/dziennik_pl.recipe +++ b/recipes/dziennik_pl.recipe @@ -18,22 +18,22 @@ class Dziennik_pl(BasicNewsRecipe): remove_javascript = True remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} - extra_css = 'ul {list-style: none; padding: 0; margin: 0;} li {float: left;margin: 0 0.15em;}' + extra_css = 'ul {list-style: none; padding: 0; margin: 0;} .foto {float: left;} .clr {clear: both;}' preprocess_regexps = [(re.compile("Komentarze:"), lambda m: ''), (re.compile('

>>> CZYTAJ TAKŻE: ".*?"

'), lambda m: '')] keep_only_tags = [dict(id='article')] - remove_tags = [dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']})] + remove_tags = [dict(name='div', attrs={'class':['art_box_dodatki', 'new_facebook_icons2', 'leftArt', 'article_print', 'quiz-widget', 'belka-spol', 'belka-spol belka-spol-bottom', 'art_data_tags', 'cl_right', 'boxRounded gal_inside']}), dict(name='a', attrs={'class':['komentarz', 'article_icon_addcommnent']}), dict(name='ins'), dict(name='br')] feeds = [(u'Wszystko', u'http://rss.dziennik.pl/Dziennik-PL/'), - (u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'), - (u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'), - (u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'), - (u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'), - (u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'), - (u'Film', u'http://rss.dziennik.pl/Dziennik-Film'), - (u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'), - (u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'), - (u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'), - (u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'), - (u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')] + (u'Wiadomości', u'http://rss.dziennik.pl/Dziennik-Wiadomosci'), + (u'Gospodarka', u'http://rss.dziennik.pl/Dziennik-Gospodarka'), + (u'Kobieta', u'http://rss.dziennik.pl/Dziennik-Kobieta'), + (u'Auto', u'http://rss.dziennik.pl/Dziennik-Auto'), + (u'Rozrywka', u'http://rss.dziennik.pl/Dziennik-Rozrywka'), + (u'Film', u'http://rss.dziennik.pl/Dziennik-Film'), + (u'Muzyka' , u'http://rss.dziennik.pl/Dziennik-Muzyka'), + (u'Kultura', u'http://rss.dziennik.pl/Dziennik-Kultura'), + (u'Nauka', u'http://rss.dziennik.pl/Dziennik-Nauka'), + (u'Podróże', u'http://rss.dziennik.pl/Dziennik-Podroze/'), + (u'Nieruchomości', u'http://rss.dziennik.pl/Dziennik-Nieruchomosci')] def skip_ad_pages(self, soup): tag = soup.find(name='a', attrs={'title':'CZYTAJ DALEJ'}) diff --git a/recipes/echo_dnia.recipe b/recipes/echo_dnia.recipe index 7dc913d062..8a44341b4a 100644 --- a/recipes/echo_dnia.recipe +++ b/recipes/echo_dnia.recipe @@ -25,7 +25,7 @@ class EchoDnia(BasicNewsRecipe): keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', - 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), + 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline', 'articleZoomText']), dict(attrs={'class':'articleFunctions'})] feeds = [(u'Wszystkie', u'http://www.echodnia.eu/rss.xml'), diff --git a/recipes/fotoblogia_pl.recipe b/recipes/fotoblogia_pl.recipe index a482390e0c..4d9947e4ec 100644 --- a/recipes/fotoblogia_pl.recipe +++ b/recipes/fotoblogia_pl.recipe @@ -7,11 +7,11 @@ class Fotoblogia_pl(BasicNewsRecipe): category = 'photography' language = 'pl' masthead_url = 'http://img.interia.pl/komputery/nimg/u/0/fotoblogia21.jpg' - cover_url= 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg' + cover_url = 'http://fotoblogia.pl/images/2009/03/fotoblogia2.jpg' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False - keep_only_tags=[dict(name='div', attrs={'class':['post-view post-standard', 'photo-container']})] - remove_tags=[dict(attrs={'class':['external fotoblogia', 'categories', 'tags']})] + keep_only_tags = [dict(name='article')] + remove_tags = [dict(attrs={'class':'article-related'})] feeds = [(u'Wszystko', u'http://fotoblogia.pl/feed/rss2')] diff --git a/recipes/gazeta_lubuska.recipe b/recipes/gazeta_lubuska.recipe index f2a42b63b8..bc11f680b7 100644 --- a/recipes/gazeta_lubuska.recipe +++ b/recipes/gazeta_lubuska.recipe @@ -16,19 +16,12 @@ class GazetaLubuska(BasicNewsRecipe): max_articles_per_feed = 100 remove_empty_feeds = True no_stylesheets = True + use_embedded_content = False ignore_duplicate_articles = {'title', 'url'} - preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - - keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] - remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', - 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', - 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), - dict(attrs={'class':'articleFunctions'})] - feeds = [(u'Wszystkie', u'http://www.gazetalubuska.pl/rss.xml'), (u'Dreznenko', u'http://www.gazetalubuska.pl/drezdenko.xml'), (u'G\u0142og\xf3w', u'http://www.gazetalubuska.pl/glogow.xml'), (u'Gorz\xf3w Wielkopolski', u'http://www.gazetalubuska.pl/gorzow-wielkopolski.xml'), (u'Gubin', u'http://www.gazetalubuska.pl/gubin.xml'), (u'Kostrzyn', u'http://www.gazetalubuska.pl/kostrzyn.xml'), (u'Krosno Odrza\u0144skie', u'http://www.gazetalubuska.pl/krosno-odrzanskie.xml'), (u'Lubsko', u'http://www.gazetalubuska.pl/lubsko.xml'), (u'Mi\u0119dzych\xf3d', u'http://www.gazetalubuska.pl/miedzychod.xml'), (u'Mi\u0119dzyrzecz', u'http://www.gazetalubuska.pl/miedzyrzecz.xml'), (u'Nowa S\xf3l', u'http://www.gazetalubuska.pl/nowa-sol.xml'), (u'S\u0142ubice', u'http://www.gazetalubuska.pl/slubice.xml'), (u'Strzelce Kraje\u0144skie', u'http://www.gazetalubuska.pl/strzelce-krajenskie.xml'), (u'Sulech\xf3w', u'http://www.gazetalubuska.pl/sulechow.xml'), (u'Sul\u0119cin', u'http://www.gazetalubuska.pl/sulecin.xml'), (u'\u015awi\u0119bodzin', u'http://www.gazetalubuska.pl/swiebodzin.xml'), (u'Wolsztyn', u'http://www.gazetalubuska.pl/wolsztyn.xml'), (u'Wschowa', u'http://www.gazetalubuska.pl/wschowa.xml'), (u'Zielona G\xf3ra', u'http://www.gazetalubuska.pl/zielona-gora.xml'), (u'\u017baga\u0144', u'http://www.gazetalubuska.pl/zagan.xml'), (u'\u017bary', u'http://www.gazetalubuska.pl/zary.xml'), (u'Sport', u'http://www.gazetalubuska.pl/sport.xml'), (u'Auto', u'http://www.gazetalubuska.pl/auto.xml'), (u'Dom', u'http://www.gazetalubuska.pl/dom.xml'), (u'Praca', u'http://www.gazetalubuska.pl/praca.xml'), (u'Zdrowie', u'http://www.gazetalubuska.pl/zdrowie.xml')] + keep_only_tags = [dict(id='article')] def get_cover_url(self): soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') @@ -37,33 +30,12 @@ class GazetaLubuska(BasicNewsRecipe): self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] return getattr(self, 'cover_url', self.cover_url) - def append_page(self, soup, appendtag): - tag = soup.find('span', attrs={'class':'photoNavigationPages'}) - if tag: - number = int(tag.string.rpartition('/')[-1].replace(' ', '')) - baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + def decode_feedportal_url(self, url): + link = url.rpartition('l/0L0S')[2][:-12] + replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) + for t in replaces: + link = link.replace(*t) + return 'http://' + link - for r in appendtag.findAll(attrs={'class':'photoNavigation'}): - r.extract() - for nr in range(2, number+1): - soup2 = self.index_to_soup(baseurl + str(nr)) - pagetext = soup2.find(id='photoContainer') - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoMeta'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoStoryText'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - - comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup + def print_version(self, url): + return self.decode_feedportal_url(url) + '&Template=printpicart' \ No newline at end of file diff --git a/recipes/gazeta_pomorska.recipe b/recipes/gazeta_pomorska.recipe index a4dc8ed1ea..b2a6932eb3 100644 --- a/recipes/gazeta_pomorska.recipe +++ b/recipes/gazeta_pomorska.recipe @@ -16,17 +16,9 @@ class GazetaPomorska(BasicNewsRecipe): max_articles_per_feed = 100 remove_empty_feeds = True no_stylesheets = True + use_embedded_content = False ignore_duplicate_articles = {'title', 'url'} - preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - - keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] - remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', - 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', - 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), - dict(attrs={'class':'articleFunctions'})] - feeds = [(u'Wszystkie', u'http://www.pomorska.pl/rss.xml'), (u'Region', u'http://www.pomorska.pl/region.xml'), (u'Bydgoszcz', u'http://www.pomorska.pl/bydgoszcz.xml'), @@ -57,6 +49,8 @@ class GazetaPomorska(BasicNewsRecipe): #(u'Reporta\u017c', u'http://www.pomorska.pl/reportaz.xml'), (u'Gospodarka', u'http://www.pomorska.pl/gospodarka.xml')] + keep_only_tags = [dict(id='article')] + def get_cover_url(self): soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] @@ -64,33 +58,12 @@ class GazetaPomorska(BasicNewsRecipe): self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] return getattr(self, 'cover_url', self.cover_url) - def append_page(self, soup, appendtag): - tag = soup.find('span', attrs={'class':'photoNavigationPages'}) - if tag: - number = int(tag.string.rpartition('/')[-1].replace(' ', '')) - baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + def decode_feedportal_url(self, url): + link = url.rpartition('l/0L0S')[2][:-12] + replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) + for t in replaces: + link = link.replace(*t) + return 'http://' + link - for r in appendtag.findAll(attrs={'class':'photoNavigation'}): - r.extract() - for nr in range(2, number+1): - soup2 = self.index_to_soup(baseurl + str(nr)) - pagetext = soup2.find(id='photoContainer') - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoMeta'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoStoryText'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - - comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup + def print_version(self, url): + return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/gazeta_wspolczesna.recipe b/recipes/gazeta_wspolczesna.recipe index 6648d8eb1a..357d37c18f 100644 --- a/recipes/gazeta_wspolczesna.recipe +++ b/recipes/gazeta_wspolczesna.recipe @@ -16,19 +16,13 @@ class GazetaWspolczesna(BasicNewsRecipe): max_articles_per_feed = 100 remove_empty_feeds = True no_stylesheets = True + use_embedded_content = False ignore_duplicate_articles = {'title', 'url'} - - preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] - remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', - 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', - 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), - dict(attrs={'class':'articleFunctions'})] - feeds = [(u'Wszystkie', u'http://www.wspolczesna.pl/rss.xml'), (u'August\xf3w', u'http://www.wspolczesna.pl/augustow.xml'), (u'Bia\u0142ystok', u'http://www.wspolczesna.pl/bialystok.xml'), (u'Bielsk Podlaski', u'http://www.wspolczesna.pl/bielsk.xml'), (u'E\u0142k', u'http://www.wspolczesna.pl/elk.xml'), (u'Grajewo', u'http://www.wspolczesna.pl/grajewo.xml'), (u'Go\u0142dap', u'http://www.wspolczesna.pl/goldap.xml'), (u'Hajn\xf3wka', u'http://www.wspolczesna.pl/hajnowka.xml'), (u'Kolno', u'http://www.wspolczesna.pl/kolno.xml'), (u'\u0141om\u017ca', u'http://www.wspolczesna.pl/lomza.xml'), (u'Mo\u0144ki', u'http://www.wspolczesna.pl/monki.xml'), (u'Olecko', u'http://www.wspolczesna.pl/olecko.xml'), (u'Ostro\u0142\u0119ka', u'http://www.wspolczesna.pl/ostroleka.xml'), (u'Powiat Bia\u0142ostocki', u'http://www.wspolczesna.pl/powiat.xml'), (u'Sejny', u'http://www.wspolczesna.pl/sejny.xml'), (u'Siemiatycze', u'http://www.wspolczesna.pl/siemiatycze.xml'), (u'Sok\xf3\u0142ka', u'http://www.wspolczesna.pl/sokolka.xml'), (u'Suwa\u0142ki', u'http://www.wspolczesna.pl/suwalki.xml'), (u'Wysokie Mazowieckie', u'http://www.wspolczesna.pl/wysokie.xml'), (u'Zambr\xf3w', u'http://www.wspolczesna.pl/zambrow.xml'), (u'Sport', u'http://www.wspolczesna.pl/sport.xml'), (u'Praca', u'http://www.wspolczesna.pl/praca.xml'), (u'Dom', u'http://www.wspolczesna.pl/dom.xml'), (u'Auto', u'http://www.wspolczesna.pl/auto.xml'), (u'Zdrowie', u'http://www.wspolczesna.pl/zdrowie.xml')] + keep_only_tags = [dict(id='article')] + def get_cover_url(self): soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] @@ -36,33 +30,12 @@ class GazetaWspolczesna(BasicNewsRecipe): self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] return getattr(self, 'cover_url', self.cover_url) - def append_page(self, soup, appendtag): - tag = soup.find('span', attrs={'class':'photoNavigationPages'}) - if tag: - number = int(tag.string.rpartition('/')[-1].replace(' ', '')) - baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + def decode_feedportal_url(self, url): + link = url.rpartition('l/0L0S')[2][:-12] + replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) + for t in replaces: + link = link.replace(*t) + return 'http://' + link - for r in appendtag.findAll(attrs={'class':'photoNavigation'}): - r.extract() - for nr in range(2, number+1): - soup2 = self.index_to_soup(baseurl + str(nr)) - pagetext = soup2.find(id='photoContainer') - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoMeta'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoStoryText'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - - comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup + def print_version(self, url): + return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/gazeta_wyborcza.recipe b/recipes/gazeta_wyborcza.recipe index 653c776723..db8d56816b 100644 --- a/recipes/gazeta_wyborcza.recipe +++ b/recipes/gazeta_wyborcza.recipe @@ -99,9 +99,8 @@ class Gazeta_Wyborcza(BasicNewsRecipe): def get_cover_url(self): soup = self.index_to_soup('http://wyborcza.pl/0,76762,3751429.html') - cover = soup.find(id='GWmini2') - soup = self.index_to_soup('http://wyborcza.pl/' + cover.contents[3].a['href']) - self.cover_url = 'http://wyborcza.pl' + soup.img['src'] + cover = soup.find(attrs={'class':'gallerycontent'}) + self.cover_url = cover.ul.li.a.img['src'].replace('P.jpg', '.jpg') return getattr(self, 'cover_url', self.cover_url) def image_url_processor(self, baseurl, url): diff --git a/recipes/gcn.recipe b/recipes/gcn.recipe index 5f403bfd73..a1b488002f 100644 --- a/recipes/gcn.recipe +++ b/recipes/gcn.recipe @@ -18,14 +18,7 @@ class GCN(BasicNewsRecipe): no_stylesheets = True ignore_duplicate_articles = {'title', 'url'} remove_attributes = ['style'] - preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - - keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] - remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', - 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', - 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), - dict(attrs={'class':'articleFunctions'})] + use_embedded_content = False feeds = [(u'Wszystkie', u'http://www.nowiny24.pl/rss.xml'), (u'Podkarpacie', u'http://www.nowiny24.pl/podkarpacie.xml'), @@ -49,6 +42,8 @@ class GCN(BasicNewsRecipe): (u'Zdrowie', u'http://www.nowiny24.pl/zdrowie.xml'), (u'Wywiady', u'http://www.nowiny24.pl/wywiady.xml')] + keep_only_tags = [dict(id='article')] + def get_cover_url(self): soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] @@ -56,33 +51,12 @@ class GCN(BasicNewsRecipe): self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] return getattr(self, 'cover_url', self.cover_url) - def append_page(self, soup, appendtag): - tag = soup.find('span', attrs={'class':'photoNavigationPages'}) - if tag: - number = int(tag.string.rpartition('/')[-1].replace(' ', '')) - baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + def decode_feedportal_url(self, url): + link = url.rpartition('l/0L0S')[2][:-12] + replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) + for t in replaces: + link = link.replace(*t) + return 'http://' + link - for r in appendtag.findAll(attrs={'class':'photoNavigation'}): - r.extract() - for nr in range(2, number+1): - soup2 = self.index_to_soup(baseurl + str(nr)) - pagetext = soup2.find(id='photoContainer') - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoMeta'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoStoryText'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - - comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup + def print_version(self, url): + return self.decode_feedportal_url(url) + '&Template=printpicart' \ No newline at end of file diff --git a/recipes/gram_pl.recipe b/recipes/gram_pl.recipe index 67d18737f9..2c5f2e37e3 100644 --- a/recipes/gram_pl.recipe +++ b/recipes/gram_pl.recipe @@ -16,7 +16,7 @@ class Gram_pl(BasicNewsRecipe): #extra_css = 'h2 {font-style: italic; font-size:20px;} .picbox div {float: left;}' cover_url=u'http://www.gram.pl/www/01/img/grampl_zima.png' keep_only_tags= [dict(id='articleModule')] - remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']}), dict(name='aside')] + remove_tags = [dict(attrs={'class':['breadCrump', 'dymek', 'articleFooter', 'twitter-share-button']}), dict(name='aside'), dict(id='metaColumn')] feeds = [(u'Informacje', u'http://www.gram.pl/feed_news.asp'), (u'Publikacje', u'http://www.gram.pl/feed_news.asp?type=articles') ] diff --git a/recipes/gry_online_pl.recipe b/recipes/gry_online_pl.recipe index 2876a9b4e8..e9868155df 100644 --- a/recipes/gry_online_pl.recipe +++ b/recipes/gry_online_pl.recipe @@ -15,7 +15,8 @@ class GryOnlinePl(BasicNewsRecipe): max_articles_per_feed = 100 no_stylesheets = True keep_only_tags = [dict(name='div', attrs={'class':['gc660', 'gc660 S013', 'news_endpage_tit', 'news_container', 'news']})] - remove_tags = [dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2']})] + remove_tags = [dict({'class':['nav-social', 'add-info', 'smlb', 'lista lista3 lista-gry', 'S013po', 'S013-npb', 'zm_gfx_cnt_bottom', 'ocen-txt', 'wiecej-txt', 'wiecej-txt2', + 'twitter-share-button']})] feeds = [ (u'Newsy', 'http://www.gry-online.pl/rss/news.xml'), ('Teksty', u'http://www.gry-online.pl/rss/teksty.xml')] @@ -44,7 +45,7 @@ class GryOnlinePl(BasicNewsRecipe): pos = len(appendtag.contents) appendtag.insert(pos, pagetext) - for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry']}): + for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'imh10b']}): r.extract() comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) for comment in comments: @@ -80,7 +81,7 @@ class GryOnlinePl(BasicNewsRecipe): [comment.extract() for comment in comments] pos = len(appendtag.contents) appendtag.insert(pos, pagetext) - for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'S018strony']}): + for r in appendtag.findAll(attrs={'class':['n5p', 'add-info', 'twitter-share-button', 'lista lista3 lista-gry', 'S018strony', 'imh10b']}): r.extract() comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) for comment in comments: diff --git a/recipes/kosmonauta_pl.recipe b/recipes/kosmonauta_pl.recipe index e93853bd57..e7dc85e0d4 100644 --- a/recipes/kosmonauta_pl.recipe +++ b/recipes/kosmonauta_pl.recipe @@ -30,4 +30,7 @@ class Kosmonauta(BasicNewsRecipe): href = a['href'] if not href.startswith('http'): a['href'] = self.INDEX + href + for a in soup.findAll(name='img'): + if a.has_key('style') and 'float:' in a['style']: + a['class'] = 'thumb-left' return soup \ No newline at end of file diff --git a/recipes/kurier_poranny.recipe b/recipes/kurier_poranny.recipe index 5c2f2d8e1e..3fe7033faf 100644 --- a/recipes/kurier_poranny.recipe +++ b/recipes/kurier_poranny.recipe @@ -17,16 +17,7 @@ class KurierPoranny(BasicNewsRecipe): remove_empty_feeds = True no_stylesheets = True ignore_duplicate_articles = {'title', 'url'} - - preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - - keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] - remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', - 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', - 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), - dict(attrs={'class':'articleFunctions'})] - + use_embedded_content = False feeds = [(u'Wszystkie', u'http://www.poranny.pl/rss.xml'), (u'Białystok', u'http://www.poranny.pl/bialystok.xml'), @@ -44,6 +35,8 @@ class KurierPoranny(BasicNewsRecipe): (u'Auto', u'http://www.poranny.pl/auto.xml'), (u'Polityka', u'http://www.poranny.pl/polityka.xml')] + keep_only_tags = [dict(id='article')] + def get_cover_url(self): soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] @@ -51,34 +44,12 @@ class KurierPoranny(BasicNewsRecipe): self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] return getattr(self, 'cover_url', self.cover_url) - def append_page(self, soup, appendtag): - tag = soup.find('span', attrs={'class':'photoNavigationPages'}) - if tag: - number = int(tag.string.rpartition('/')[-1].replace(' ', '')) - baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + def decode_feedportal_url(self, url): + link = url.rpartition('l/0L0S')[2][:-12] + replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) + for t in replaces: + link = link.replace(*t) + return 'http://' + link - for r in appendtag.findAll(attrs={'class':'photoNavigation'}): - r.extract() - for nr in range(2, number+1): - soup2 = self.index_to_soup(baseurl + str(nr)) - pagetext = soup2.find(id='photoContainer') - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoMeta'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoStoryText'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - - comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) - for comment in comments: - comment.extract() - - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup + def print_version(self, url): + return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/media2.recipe b/recipes/media2.recipe index d685a90803..7cd542457f 100644 --- a/recipes/media2.recipe +++ b/recipes/media2.recipe @@ -12,6 +12,7 @@ class media2_pl(BasicNewsRecipe): description = u'Media2.pl to jeden z najczęściej odwiedzanych serwisów dla profesjonalistów z branży medialnej, telekomunikacyjnej, public relations oraz nowych technologii.' masthead_url = 'http://media2.pl/res/logo/www.png' cover_url = 'http://media2.pl/res/logo/www.png' + INDEX = 'http://media2.pl' remove_empty_feeds = True oldest_article = 7 max_articles_per_feed = 100 @@ -22,10 +23,16 @@ class media2_pl(BasicNewsRecipe): extra_css = '''.news-lead{font-weight: bold; }''' keep_only_tags = [dict(name = 'div', attrs = {'class' : 'news-item tpl-big'})] - remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : 'item-sidebar'}), dict(name = 'div', attrs = {'class' : 'news-tags'})] + remove_tags = [dict(name = 'span', attrs = {'class' : 'news-comments'}), dict(name = 'div', attrs = {'class' : ['item-sidebar', 'news-inline-promo nobbtext']}), + dict(name = 'div', attrs = {'class' : 'news-tags'})] feeds = [(u'Media2', u'http://feeds.feedburner.com/media2'), (u'Internet', u'http://feeds.feedburner.com/media2/internet'), (u'Media', 'http://feeds.feedburner.com/media2/media'), (u'Telekomunikacja', 'http://feeds.feedburner.com/media2/telekomunikacja'), (u'Reklama/PR', 'http://feeds.feedburner.com/media2/reklama-pr'), (u'Technologie', 'http://feeds.feedburner.com/media2/technologie'), (u'Badania', 'http://feeds.feedburner.com/media2/badania') - ] \ No newline at end of file + ] + + def image_url_processor(self, baseurl, url): + if url[0] == '/': + url = self.INDEX + url + return url \ No newline at end of file diff --git a/recipes/nto.recipe b/recipes/nto.recipe index ed29db22ba..f38ccebeaf 100644 --- a/recipes/nto.recipe +++ b/recipes/nto.recipe @@ -17,18 +17,12 @@ class NTO(BasicNewsRecipe): remove_empty_feeds = True no_stylesheets = True ignore_duplicate_articles = {'title', 'url'} - - preprocess_regexps = [(re.compile(ur'Czytaj:.*?', re.DOTALL), lambda match: ''), (re.compile(ur'Przeczytaj także:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), - (re.compile(ur'Przeczytaj również:.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur'Zobacz też:.*?', re.DOTALL|re.IGNORECASE), lambda match: '')] - - keep_only_tags = [dict(id=['article', 'cover', 'photostory'])] - remove_tags = [dict(id=['articleTags', 'articleMeta', 'boxReadIt', 'articleGalleries', 'articleConnections', - 'ForumArticleComments', 'articleRecommend', 'jedynkiLinks', 'articleGalleryConnections', - 'photostoryConnections', 'articleEpaper', 'articlePoll', 'articleAlarm', 'articleByline']), - dict(attrs={'class':'articleFunctions'})] + use_embedded_content = False feeds = [(u'Wszystkie', u'http://www.nto.pl/rss.xml'), (u'Region', u'http://www.nto.pl/region.xml'), (u'Brzeg', u'http://www.nto.pl/brzeg.xml'), (u'G\u0142ubczyce', u'http://www.nto.pl/glubczyce.xml'), (u'K\u0119dzierzyn-Ko\u017ale', u'http://www.nto.pl/kedzierzynkozle.xml'), (u'Kluczbork', u'http://www.nto.pl/kluczbork.xml'), (u'Krapkowice', u'http://www.nto.pl/krapkowice.xml'), (u'Namys\u0142\xf3w', u'http://www.nto.pl/namyslow.xml'), (u'Nysa', u'http://www.nto.pl/nysa.xml'), (u'Olesno', u'http://www.nto.pl/olesno.xml'), (u'Opole', u'http://www.nto.pl/opole.xml'), (u'Prudnik', u'http://www.nto.pl/prudnik.xml'), (u'Strzelce Opolskie', u'http://www.nto.pl/strzelceopolskie.xml'), (u'Sport', u'http://www.nto.pl/sport.xml'), (u'Polska i \u015bwiat', u'http://www.nto.pl/apps/pbcs.dll/section?Category=RSS&channel=KRAJSWIAT'), (u'Zdrowy styl', u'http://www.nto.pl/apps/pbcs.dll/section?Category=rss_zdrowystyl'), (u'Reporta\u017c', u'http://www.nto.pl/reportaz.xml'), (u'Studia', u'http://www.nto.pl/akademicka.xml')] + keep_only_tags = [dict(id='article')] + def get_cover_url(self): soup = self.index_to_soup(self.INDEX + '/apps/pbcs.dll/section?Category=JEDYNKI') nexturl = self.INDEX + soup.find(id='covers').find('a')['href'] @@ -36,33 +30,12 @@ class NTO(BasicNewsRecipe): self.cover_url = self.INDEX + soup.find(id='cover').find(name='img')['src'] return getattr(self, 'cover_url', self.cover_url) - def append_page(self, soup, appendtag): - tag = soup.find('span', attrs={'class':'photoNavigationPages'}) - if tag: - number = int(tag.string.rpartition('/')[-1].replace(' ', '')) - baseurl = self.INDEX + soup.find(attrs={'class':'photoNavigationNext'})['href'][:-1] + def decode_feedportal_url(self, url): + link = url.rpartition('l/0L0S')[2][:-12] + replaces = (('0B', '.'), ('0C', '/'), ('0H', ','), ('0D', '?'), ('0F', '='), ('0A', '0'), ('0I', '_')) + for t in replaces: + link = link.replace(*t) + return 'http://' + link - for r in appendtag.findAll(attrs={'class':'photoNavigation'}): - r.extract() - for nr in range(2, number+1): - soup2 = self.index_to_soup(baseurl + str(nr)) - pagetext = soup2.find(id='photoContainer') - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoMeta'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - pagetext = soup2.find(attrs={'class':'photoStoryText'}) - if pagetext: - pos = len(appendtag.contents) - appendtag.insert(pos, pagetext) - - comments = appendtag.findAll(text=lambda text:isinstance(text, Comment)) - for comment in comments: - comment.extract() - - def preprocess_html(self, soup): - self.append_page(soup, soup.body) - return soup + def print_version(self, url): + return self.decode_feedportal_url(url) + '&Template=printpicart' diff --git a/recipes/presseurop.recipe b/recipes/presseurop.recipe index ea06eb0c32..dacabcf43f 100644 --- a/recipes/presseurop.recipe +++ b/recipes/presseurop.recipe @@ -17,6 +17,7 @@ class presseurop(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 auto_cleanup = True + remove_empty_feeds = True feeds = [ (u'Polityka', u'http://www.presseurop.eu/pl/taxonomy/term/1/%2A/feed'), diff --git a/recipes/stopklatka.recipe b/recipes/stopklatka.recipe index 1f629b1225..70198e497b 100644 --- a/recipes/stopklatka.recipe +++ b/recipes/stopklatka.recipe @@ -8,7 +8,7 @@ class Stopklatka_pl(BasicNewsRecipe): category = 'movies' language = 'pl' encoding = 'utf-8' - extra_css = 'img {display: block;} ul {list-style-type: none;} li {display: inline;}' + extra_css = 'img {display: block;} ul {list-style-type: none;}' cover_url = 'http://static1.stopklatka.pl/images/20/19/11501.jpg' use_embedded_content = False oldest_article = 7 @@ -23,7 +23,7 @@ class Stopklatka_pl(BasicNewsRecipe): keep_only_tags = [dict(attrs={'class':'asset-full-content default-asset-publisher show-asset-title'})] remove_tags = [dict(attrs={'class':['metadata-entry metadata-tags', 'print-action', 'asset-flag', 'asset-ratings', 'ad-nav']}), dict(id='contest')] - feeds = [(u'Wiadomo\u015bci', u'http://stopklatka.pl/wiadomosci/-/asset_publisher/Hl7x4Ku4GpZj/rss?p_p_cacheability=cacheLevelPage'), (u'Artyku\u0142y', u'http://stopklatka.pl/artykuly/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage'), (u'Premiery i zapowiedzi', u'http://stopklatka.pl/premiery-i-zapowiedzi?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13393201&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13760176&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=15238425&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13470227&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13913324&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20234402&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13917041&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13905169&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14253975&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21586017&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13540662&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=12999052&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=45280408&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14826890&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13459998&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13070805&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20209965&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21741457&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=35577381&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13530138&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13392987%2Cmartwe-zlo&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13760162%2Cuklad-zamkniety&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F15238403%2Cwszyscy-w-naszej-rodzinie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13470213%2Cdonoma&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13913310%2Ccristiada&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20234381%2Craj-wiara&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13917027%2Cintruz&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13905155%2Cspring-breakers&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14253957%2Ckrudowie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21586004%2Cswieta-czworca&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13540648%2Ckwartet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F12999038%2Cimagine&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280404%2Cdom-na-kolkach&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14826876%2Cg-i-joe-odwet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13459984%2Cnieobliczalni&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13070591%2Csamotny-port-milosc&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20209952%2Czanim-noc-nas-nie-rozdzieli&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21741444%2Chemel&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35577377%2Czywie-bielarus-&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13530124%2Cpanaceum'), (u'Recenzje filmowe', u'http://stopklatka.pl/box-office/-/asset_publisher/3yxqotUEiqHJ/rss?p_p_cacheability=cacheLevelPage'), (u'Recenzje', u'http://stopklatka.pl/recenzje/-/asset_publisher/5oZ3s2J3L0tG/rss?p_p_cacheability=cacheLevelPage'), (u'Gwiazdy', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/PqN7MDEGWGvh/rss?p_p_cacheability=cacheLevelPage'), (u'Wywiady Stopklatki', u'http://stopklatka.pl/wywiady/-/asset_publisher/uVh3OrZCaLd7/rss?p_p_cacheability=cacheLevelPage'), (u'Prosto z Hollywood', u'http://stopklatka.pl/wywiady-z-hollywood/-/asset_publisher/YsbU0JSoxb9G/rss?p_p_cacheability=cacheLevelPage'), (u'Plotki', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/XuF8EGAkVeTa/rss?p_p_cacheability=cacheLevelPage'), (u'Box Office Polska', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=47982267&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=46685247&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=45280313&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47982263%2Cbox-office-weekendowy-polska-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46685243%2Cbox-office-weekendowy-polska-08-03-2013-10-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280309%2Cbox-office-weekendowy-polska-01-03-2013-03-03-2013'), (u'Box Office USA', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=2&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=49047234&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=48879358&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47605057&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47809980&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=46505246&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F49047230%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879354%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47605053%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47809976%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46505242%2Cbox-office-weekendowy-stany-zjednoczone-08-03-2013-10-03-2013'), (u'Relacje', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/IkgAkSFxLWV2/rss?p_p_cacheability=cacheLevelPage'), (u'Kalendarium imprez', u'http://stopklatka.pl/kalendarium-imprez?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628974&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627805&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45317244&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884855&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629292&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884742&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482058&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627893&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482076&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627838&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48167620&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482067&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47811744&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482049&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629615&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088670&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628531&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481950&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481496&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482022&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323743&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628034&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628064&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088819&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482031&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481415&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481977&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323617&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481932&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481995&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628970%2C4-festiwal-filmow-swiata-trzy-zywioly&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627801%2Cwielka-podroz-krudow&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45317240%2C6-przeglad-kina-rosyjskiego-nowe-kino-rosyjskie-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884851%2C2-1-nowy-cykl-spotkan-literatury-z-filmem&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629288%2C5-festiwal-polskich-filmow-krotkometrazowych-short-waves&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884738%2Cmoico-enjoy-movies-przeglad-filmow-klasy-b-we-wroclawiu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482054%2C1-ogolnopolski-festiwal-polskiej-animacji-o-pla-2013-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627889%2Cviii-festiwal-filmow-afrykanskich-afrykamera-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482072%2C6-miedzynarodowy-festwial-kina-niezaleznego-off-plus-camera&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627834%2C11-przeglad-filmow-studenckich-z-lodzkiej-filmowki-lodzia-po-wisle-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48167616%2Cweze-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482063%2Cxiv-festiwal-kina-amatorskiego-i-niezaleznego-kan&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47811740%2Cv-festiwal-muzyki-filmowej-krzysztofa-komedy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482045%2Ckonkurs-scenariuszowy-script-pro-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629611%2C9-miedzynarodowy-festiwal-filmowy-%E2%80%9Ezydowskie-motywy%E2%80%9D&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088666%2C15-przeglad-filmowy-cieszyn-kino-na-granicy-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628527%2Cdzien-filmowca-filmmaker-s-day&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481946%2C10-planete-doc-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481492%2C66-miedzynarodowy-festiwal-filmowy-w-cannes&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482018%2C16-festiwal-filmow-kultowych&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323739%2C53-krakowski-festiwal-filmowy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628030%2C4-festiwal-filmow-mlodziezowych-18&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628060%2Cvii-superorbitalny-festiwal-filmow-amatorskich-soffa&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088815%2Cxv-miedzynarodowy-festiwal-filmow-przyrodniczych-im-wlodzimierza-puchalskiego&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482027%2C32-koszalinski-festiwal-debiutow-filmowych-mlodzi-i-film-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481411%2C6-miedzynarodowy-festiwal-filmow-animowanych-animator-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481973%2C13-miedzynarodowy-festiwal-filmowy-sopot-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323613%2C13-miedzynarodowy-festiwal-filmowy-t-mobile-nowe-horyzonty&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481928%2C7-festiwal-filmu-i-sztuki-dwa-brzegi-w-kazimierzu-dolnym-i-janowcu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481991%2C3-miedzynarodowy-festiwal-filmu-i-muzyki-transatlantyk'), (u'Konkursy', u'http://stopklatka.pl/konkursy?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=3&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=47091950&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48879762&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48880109&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47091941%2Cksiazki-dwie-kobiety-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879753%2Cdvd-rozmowy-noca-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48880000%2Cdvd-milosc-'), (u'Komiks Stopklatki', u'http://stopklatka.pl/komiks/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage')] + feeds = [(u'Wiadomo\u015bci', u'http://stopklatka.pl/wiadomosci/-/asset_publisher/Hl7x4Ku4GpZj/rss?p_p_cacheability=cacheLevelPage'), (u'Artyku\u0142y', u'http://stopklatka.pl/artykuly/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage'), (u'Premiery i zapowiedzi', u'http://stopklatka.pl/premiery-i-zapowiedzi?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13393201&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13760176&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=15238425&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13470227&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13913324&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20234402&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13917041&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13905169&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14253975&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21586017&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13540662&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=12999052&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=45280408&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14826890&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13459998&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13070805&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20209965&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21741457&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=35577381&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13530138&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13392987%2Cmartwe-zlo&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13760162%2Cuklad-zamkniety&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F15238403%2Cwszyscy-w-naszej-rodzinie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13470213%2Cdonoma&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13913310%2Ccristiada&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20234381%2Craj-wiara&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13917027%2Cintruz&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13905155%2Cspring-breakers&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14253957%2Ckrudowie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21586004%2Cswieta-czworca&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13540648%2Ckwartet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F12999038%2Cimagine&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280404%2Cdom-na-kolkach&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14826876%2Cg-i-joe-odwet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13459984%2Cnieobliczalni&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13070591%2Csamotny-port-milosc&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20209952%2Czanim-noc-nas-nie-rozdzieli&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21741444%2Chemel&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35577377%2Czywie-bielarus-&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13530124%2Cpanaceum'), (u'Recenzje filmowe', u'http://stopklatka.pl/box-office/-/asset_publisher/3yxqotUEiqHJ/rss?p_p_cacheability=cacheLevelPage'), (u'Recenzje', u'http://stopklatka.pl/recenzje/-/asset_publisher/5oZ3s2J3L0tG/rss?p_p_cacheability=cacheLevelPage'), (u'Gwiazdy', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/PqN7MDEGWGvh/rss?p_p_cacheability=cacheLevelPage'), (u'Wywiady Stopklatki', u'http://stopklatka.pl/wywiady/-/asset_publisher/uVh3OrZCaLd7/rss?p_p_cacheability=cacheLevelPage'), (u'Prosto z Hollywood', u'http://stopklatka.pl/wywiady-z-hollywood/-/asset_publisher/YsbU0JSoxb9G/rss?p_p_cacheability=cacheLevelPage'), (u'Plotki', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/XuF8EGAkVeTa/rss?p_p_cacheability=cacheLevelPage'), (u'Box Office Polska', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=47982267&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=46685247&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=45280313&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47982263%2Cbox-office-weekendowy-polska-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46685243%2Cbox-office-weekendowy-polska-08-03-2013-10-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280309%2Cbox-office-weekendowy-polska-01-03-2013-03-03-2013'), (u'Box Office USA', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=2&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=49047234&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=48879358&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47605057&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47809980&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=46505246&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F49047230%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879354%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47605053%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47809976%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46505242%2Cbox-office-weekendowy-stany-zjednoczone-08-03-2013-10-03-2013'), (u'Kalendarium imprez', u'http://stopklatka.pl/kalendarium-imprez?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628974&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627805&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45317244&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884855&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629292&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884742&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482058&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627893&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482076&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627838&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48167620&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482067&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47811744&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482049&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629615&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088670&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628531&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481950&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481496&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482022&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323743&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628034&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628064&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088819&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482031&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481415&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481977&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323617&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481932&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481995&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628970%2C4-festiwal-filmow-swiata-trzy-zywioly&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627801%2Cwielka-podroz-krudow&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45317240%2C6-przeglad-kina-rosyjskiego-nowe-kino-rosyjskie-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884851%2C2-1-nowy-cykl-spotkan-literatury-z-filmem&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629288%2C5-festiwal-polskich-filmow-krotkometrazowych-short-waves&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884738%2Cmoico-enjoy-movies-przeglad-filmow-klasy-b-we-wroclawiu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482054%2C1-ogolnopolski-festiwal-polskiej-animacji-o-pla-2013-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627889%2Cviii-festiwal-filmow-afrykanskich-afrykamera-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482072%2C6-miedzynarodowy-festwial-kina-niezaleznego-off-plus-camera&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627834%2C11-przeglad-filmow-studenckich-z-lodzkiej-filmowki-lodzia-po-wisle-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48167616%2Cweze-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482063%2Cxiv-festiwal-kina-amatorskiego-i-niezaleznego-kan&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47811740%2Cv-festiwal-muzyki-filmowej-krzysztofa-komedy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482045%2Ckonkurs-scenariuszowy-script-pro-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629611%2C9-miedzynarodowy-festiwal-filmowy-%E2%80%9Ezydowskie-motywy%E2%80%9D&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088666%2C15-przeglad-filmowy-cieszyn-kino-na-granicy-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628527%2Cdzien-filmowca-filmmaker-s-day&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481946%2C10-planete-doc-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481492%2C66-miedzynarodowy-festiwal-filmowy-w-cannes&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482018%2C16-festiwal-filmow-kultowych&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323739%2C53-krakowski-festiwal-filmowy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628030%2C4-festiwal-filmow-mlodziezowych-18&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628060%2Cvii-superorbitalny-festiwal-filmow-amatorskich-soffa&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088815%2Cxv-miedzynarodowy-festiwal-filmow-przyrodniczych-im-wlodzimierza-puchalskiego&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482027%2C32-koszalinski-festiwal-debiutow-filmowych-mlodzi-i-film-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481411%2C6-miedzynarodowy-festiwal-filmow-animowanych-animator-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481973%2C13-miedzynarodowy-festiwal-filmowy-sopot-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323613%2C13-miedzynarodowy-festiwal-filmowy-t-mobile-nowe-horyzonty&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481928%2C7-festiwal-filmu-i-sztuki-dwa-brzegi-w-kazimierzu-dolnym-i-janowcu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481991%2C3-miedzynarodowy-festiwal-filmu-i-muzyki-transatlantyk'), (u'Konkursy', u'http://stopklatka.pl/konkursy?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=3&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=47091950&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48879762&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48880109&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47091941%2Cksiazki-dwie-kobiety-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879753%2Cdvd-rozmowy-noca-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48880000%2Cdvd-milosc-'), (u'Komiks Stopklatki', u'http://stopklatka.pl/komiks/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage')] def append_page(self, soup, appendtag): tag = soup.find('a', attrs={'class': 'next'}) diff --git a/recipes/tablety_pl.recipe b/recipes/tablety_pl.recipe index 827a86180e..04022726f8 100644 --- a/recipes/tablety_pl.recipe +++ b/recipes/tablety_pl.recipe @@ -14,5 +14,5 @@ class Tablety_pl(BasicNewsRecipe): max_articles_per_feed = 100 preprocess_regexps = [(re.compile(ur'

Przeczytaj także.*?

', re.DOTALL), lambda match: ''), (re.compile(ur'

Przeczytaj koniecznie.*?

', re.DOTALL), lambda match: '')] keep_only_tags = [dict(id='news_block')] - remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments', 'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer']})] + remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments', 'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer', 'social-custom']})] feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')] \ No newline at end of file