diff --git a/recipes/forsal.recipe b/recipes/forsal.recipe index 4ebbb5a2a8..22a1ddcda5 100644 --- a/recipes/forsal.recipe +++ b/recipes/forsal.recipe @@ -9,6 +9,7 @@ class ForsalPL(BasicNewsRecipe): oldest_article = 7 max_articles_per_feed = 100 use_embedded_content = False + remove_empty_feeds = True ignore_duplicate_articles = {'title', 'url'} cover_url = 'http://www.bizneswnieruchomosciach.pl/wp-content/uploads/2010/07/logo_forsal.jpg' no_stylesheets = True diff --git a/recipes/kosmonauta_pl.recipe b/recipes/kosmonauta_pl.recipe index d943739832..e93853bd57 100644 --- a/recipes/kosmonauta_pl.recipe +++ b/recipes/kosmonauta_pl.recipe @@ -8,7 +8,7 @@ class Kosmonauta(BasicNewsRecipe): category = 'astronomy' language = 'pl' cover_url = 'http://bi.gazeta.pl/im/4/10393/z10393414X,Kosmonauta-net.jpg' - extra_css = '.thumbnail {float:left;margin-right:5px;}' + extra_css = '.thumb-left {float:left; margin-right:5px;} .calibre_navbar {clear: both;}' no_stylesheets = True INDEX = 'http://www.kosmonauta.net' oldest_article = 7 diff --git a/recipes/national_geographic_pl.recipe b/recipes/national_geographic_pl.recipe index 07fc0da666..be02ce4ebd 100644 --- a/recipes/national_geographic_pl.recipe +++ b/recipes/national_geographic_pl.recipe @@ -10,8 +10,8 @@ class recipeMagic(BasicNewsRecipe): title = 'National Geographic PL' __author__ = 'Marcin Urban 2011' __modified_by__ = 'fenuks' - description = 'legenda wśród magazynów z historią sięgającą 120 lat' - #cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg' + description = 'Legenda wśród magazynów z historią sięgającą 120 lat' + #cover_url = 'http://www.guj.pl/var/guj/storage/images/media/nasze_magazyny/national_geographic/logo/ng_logo/2606-1-pol-PL/ng_logo.jpg' oldest_article = 7 max_articles_per_feed = 100 no_stylesheets = True @@ -21,13 +21,14 @@ class recipeMagic(BasicNewsRecipe): publisher = 'G+J Gruner+Jahr Polska' category = 'news, PL,' language = 'pl' + remove_empty_feeds = True publication_type = 'newsportal' extra_css = ''' body {font-family: verdana, arial, helvetica, geneva, sans-serif ;} - h1{text-align: center;} - h2{font-size: medium; font-weight: bold;} - .authordate {font-size: small; color: #696969;} - p.lead {font-weight: bold; text-align: center;} - .fot{font-size: x-small; color: #666666;} ''' + h1{text-align: center;} + h2{font-size: medium; font-weight: bold;} + .authordate {font-size: small; color: #696969;} + p.lead {font-weight: bold; text-align: center;} + .fot{font-size: x-small; color: #666666;} ''' preprocess_regexps = [(re.compile(r'', re.DOTALL), lambda m: '')] conversion_options = { 'comments' : description @@ -37,9 +38,9 @@ class recipeMagic(BasicNewsRecipe): ,'linearize_tables': True } - remove_tags = [ - dict(name='div', attrs={'class':'add_inf'}), - dict(name='div', attrs={'class':'add_f'}), + remove_tags = [ + dict(name='div', attrs={'class':'add_inf'}), + dict(name='div', attrs={'class':'add_f'}), ] remove_attributes = ['width','height'] @@ -47,14 +48,16 @@ class recipeMagic(BasicNewsRecipe): def find_articles(self, url): articles = [] - soup=self.index_to_soup(url) - tag=soup.find(attrs={'class':'arl'}) - art=tag.ul.findAll('li') + soup = self.index_to_soup(url) + tag = soup.find(attrs={'class':'arl'}) + if not tag: + return articles + art = tag.ul.findAll('li') for i in art: - title=i.a['title'] - url=i.a['href'] + title = i.a['title'] + url = i.a['href'] #date=soup.find(id='footer').ul.li.string[41:-1] - desc=i.div.p.string + desc = i.div.p.string articles.append({'title' : title, 'url' : url, 'date' : '', diff --git a/recipes/polter_pl.recipe b/recipes/polter_pl.recipe index aea21dca9c..fdebcbc269 100644 --- a/recipes/polter_pl.recipe +++ b/recipes/polter_pl.recipe @@ -2,7 +2,7 @@ __license__ = 'GPL v3' import re from calibre.web.feeds.news import BasicNewsRecipe -class Poltergeist(BasicNewsRecipe): +class Polter(BasicNewsRecipe): title = u'Polter.pl' __author__ = 'fenuks' description = u'Największy polski serwis poświęcony ogólno pojętej fantastyce - grom fabularnym (RPG), książkom, filmowi, komiksowi, grom planszowym, karcianym i bitewnym.' @@ -10,21 +10,26 @@ class Poltergeist(BasicNewsRecipe): #publication_type = '' language = 'pl' #encoding = '' - extra_css = '.image, .floatright {float: right; margin-left: 10px;} .floatleft {float: left; margin-right: 10px;}' + extra_css = '.image, .floatright {float: right; margin-left: 10px;} .floatleft {float: left; margin-right: 10px;} .calibre_navbar {clear: both;} .p_title {font-weight: bold;} .p_image {margin-left: auto; margin-right: auto; display: block;} .italic {font-style: italic;}' cover_url = 'http://static.polter.pl/sub/promo/bpromo2524.jpg' #masthead_url = '' use_embedded_content = False oldest_article = 7 - preprocess_regexps = [(re.compile(ur']*?id="pol_lista"[^>]*?>.*', re.DOTALL|re.IGNORECASE), lambda match: ''), (re.compile(ur']*?>wersja do druku', re.DOTALL|re.IGNORECASE), lambda match: '')] + preprocess_regexps = [(re.compile(ur']*?id="pol_lista"[^>]*?>.*', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(ur']*?>wersja do druku', re.DOTALL|re.IGNORECASE), lambda match: ''), + #(re.compile(ur']*">(]*?/>)', re.DOTALL|re.IGNORECASE), lambda match: '/1') + (re.compile(ur'(
[\n\s\r]*){2,}', re.DOTALL|re.IGNORECASE), lambda match: '
'), + (re.compile(ur']*>Zaloguj się aby wyłączyć tę reklamę', re.DOTALL|re.IGNORECASE), lambda match: ''), + ] max_articles_per_feed = 100 no_stylesheets = True remove_empty_feeds = True remove_javascript = True - remove_attributes = ['style', 'font'] + remove_attributes = ['font', 'fieldset', 'onclick'] ignore_duplicate_articles = {'title', 'url'} keep_only_tags = [dict(attrs={'class':'boxcontent'})] - remove_tags = [dict(attrs={'class':'fb-like'}), dict(attrs={'alt':'Wersja do druku'}), dict(id='pol_liczba'), dict(attrs={'scr':'http://static.polter.pl/tplimg/buttons/ceneo_140_40.gif'})] + remove_tags = [dict(attrs={'class':'fb-like'}), dict(attrs={'alt':'Wersja do druku'}), dict(id=['pol_liczba', 'col12AdSenseLight']), dict(attrs={'scr':'http://static.polter.pl/tplimg/buttons/ceneo_140_40.gif'}), dict(name=['g:plusone', 'fb:like'])] remove_tags_after = dict(attrs={'class':'fb-like'}) #remove_tags_before = dict() @@ -35,9 +40,24 @@ class Poltergeist(BasicNewsRecipe): s['class'] = 'floatleft' for s in soup.findAll(attrs={'style':re.compile('float: ?right')}): s['class'] = 'floatright' + for s in soup.findAll(style=True): + if 'bold;' in s['style']: + if s.get('class', ''): + s['class'] = s['class'] + ' p_title' + else: + s['class'] = 'p_title' + if 'italic;' in s['style']: + if s.get('class', ''): + s['class'] = s['class'] + ' italic' + else: + s['class'] = 'italic' + del s['style'] + tag = soup.find(id='twoja_ocena') if tag: tag.parent.extract() for tag in soup.findAll(id='lista_chce_ile'): tag.parent.parent.extract() + for r in soup.findAll(name='a', href=re.compile(r'^http://www.ceneo.pl/')): + r.extract() return soup \ No newline at end of file diff --git a/recipes/stopklatka.recipe b/recipes/stopklatka.recipe index 28c92a2453..1f629b1225 100644 --- a/recipes/stopklatka.recipe +++ b/recipes/stopklatka.recipe @@ -12,6 +12,7 @@ class Stopklatka_pl(BasicNewsRecipe): cover_url = 'http://static1.stopklatka.pl/images/20/19/11501.jpg' use_embedded_content = False oldest_article = 7 + BASEURL = 'http://stopklatka.pl' max_articles_per_feed = 100 no_stylesheets = True remove_empty_feeds = True @@ -20,9 +21,8 @@ class Stopklatka_pl(BasicNewsRecipe): ignore_duplicate_articles = {'title', 'url'} keep_only_tags = [dict(attrs={'class':'asset-full-content default-asset-publisher show-asset-title'})] - remove_tags = [dict(attrs={'class':['metadata-entry metadata-tags', 'print-action', 'asset-flag', 'asset-ratings']}), dict(id='contest')] - #remove_tags_after = dict() - #remove_tags_before = dict() + remove_tags = [dict(attrs={'class':['metadata-entry metadata-tags', 'print-action', 'asset-flag', 'asset-ratings', 'ad-nav']}), dict(id='contest')] + feeds = [(u'Wiadomo\u015bci', u'http://stopklatka.pl/wiadomosci/-/asset_publisher/Hl7x4Ku4GpZj/rss?p_p_cacheability=cacheLevelPage'), (u'Artyku\u0142y', u'http://stopklatka.pl/artykuly/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage'), (u'Premiery i zapowiedzi', u'http://stopklatka.pl/premiery-i-zapowiedzi?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13393201&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13760176&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=15238425&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13470227&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13913324&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20234402&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13917041&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13905169&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14253975&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21586017&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13540662&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=12999052&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=45280408&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=14826890&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13459998&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13070805&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=20209965&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=21741457&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=35577381&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_assetEntryIds=13530138&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13392987%2Cmartwe-zlo&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13760162%2Cuklad-zamkniety&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F15238403%2Cwszyscy-w-naszej-rodzinie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13470213%2Cdonoma&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13913310%2Ccristiada&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20234381%2Craj-wiara&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13917027%2Cintruz&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13905155%2Cspring-breakers&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14253957%2Ckrudowie&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21586004%2Cswieta-czworca&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13540648%2Ckwartet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F12999038%2Cimagine&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280404%2Cdom-na-kolkach&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F14826876%2Cg-i-joe-odwet&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13459984%2Cnieobliczalni&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13070591%2Csamotny-port-milosc&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F20209952%2Czanim-noc-nas-nie-rozdzieli&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F21741444%2Chemel&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35577377%2Czywie-bielarus-&_eventsearch_WAR_eventsearchportlet_INSTANCE_FLRWmpE7H8IL_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F13530124%2Cpanaceum'), (u'Recenzje filmowe', u'http://stopklatka.pl/box-office/-/asset_publisher/3yxqotUEiqHJ/rss?p_p_cacheability=cacheLevelPage'), (u'Recenzje', u'http://stopklatka.pl/recenzje/-/asset_publisher/5oZ3s2J3L0tG/rss?p_p_cacheability=cacheLevelPage'), (u'Gwiazdy', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/PqN7MDEGWGvh/rss?p_p_cacheability=cacheLevelPage'), (u'Wywiady Stopklatki', u'http://stopklatka.pl/wywiady/-/asset_publisher/uVh3OrZCaLd7/rss?p_p_cacheability=cacheLevelPage'), (u'Prosto z Hollywood', u'http://stopklatka.pl/wywiady-z-hollywood/-/asset_publisher/YsbU0JSoxb9G/rss?p_p_cacheability=cacheLevelPage'), (u'Plotki', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/XuF8EGAkVeTa/rss?p_p_cacheability=cacheLevelPage'), (u'Box Office Polska', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=47982267&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=46685247&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_assetEntryIds=45280313&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47982263%2Cbox-office-weekendowy-polska-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46685243%2Cbox-office-weekendowy-polska-08-03-2013-10-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_Gqb98cI5dgSJ_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45280309%2Cbox-office-weekendowy-polska-01-03-2013-03-03-2013'), (u'Box Office USA', u'http://stopklatka.pl/box-office?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=2&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=49047234&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=48879358&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47605057&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=47809980&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_assetEntryIds=46505246&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F49047230%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879354%2Cbox-office-weekendowy-stany-zjednoczone-22-03-2013-24-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47605053%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013-estymacja-&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47809976%2Cbox-office-weekendowy-stany-zjednoczone-15-03-2013-17-03-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_24AXs0agMxJd_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F46505242%2Cbox-office-weekendowy-stany-zjednoczone-08-03-2013-10-03-2013'), (u'Relacje', u'http://stopklatka.pl/czerwony-dywan/-/asset_publisher/IkgAkSFxLWV2/rss?p_p_cacheability=cacheLevelPage'), (u'Kalendarium imprez', u'http://stopklatka.pl/kalendarium-imprez?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=1&p_p_col_count=3&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628974&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627805&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45317244&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884855&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629292&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48884742&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482058&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627893&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482076&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47627838&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=48167620&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482067&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47811744&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482049&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47629615&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088670&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628531&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481950&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481496&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482022&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323743&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628034&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=47628064&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=45088819&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35482031&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481415&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481977&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=19323617&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481932&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_assetEntryIds=35481995&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628970%2C4-festiwal-filmow-swiata-trzy-zywioly&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627801%2Cwielka-podroz-krudow&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45317240%2C6-przeglad-kina-rosyjskiego-nowe-kino-rosyjskie-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884851%2C2-1-nowy-cykl-spotkan-literatury-z-filmem&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629288%2C5-festiwal-polskich-filmow-krotkometrazowych-short-waves&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48884738%2Cmoico-enjoy-movies-przeglad-filmow-klasy-b-we-wroclawiu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482054%2C1-ogolnopolski-festiwal-polskiej-animacji-o-pla-2013-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627889%2Cviii-festiwal-filmow-afrykanskich-afrykamera-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482072%2C6-miedzynarodowy-festwial-kina-niezaleznego-off-plus-camera&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47627834%2C11-przeglad-filmow-studenckich-z-lodzkiej-filmowki-lodzia-po-wisle-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48167616%2Cweze-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482063%2Cxiv-festiwal-kina-amatorskiego-i-niezaleznego-kan&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47811740%2Cv-festiwal-muzyki-filmowej-krzysztofa-komedy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482045%2Ckonkurs-scenariuszowy-script-pro-2013&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47629611%2C9-miedzynarodowy-festiwal-filmowy-%E2%80%9Ezydowskie-motywy%E2%80%9D&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088666%2C15-przeglad-filmowy-cieszyn-kino-na-granicy-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628527%2Cdzien-filmowca-filmmaker-s-day&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481946%2C10-planete-doc-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481492%2C66-miedzynarodowy-festiwal-filmowy-w-cannes&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482018%2C16-festiwal-filmow-kultowych&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323739%2C53-krakowski-festiwal-filmowy&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628030%2C4-festiwal-filmow-mlodziezowych-18&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47628060%2Cvii-superorbitalny-festiwal-filmow-amatorskich-soffa&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F45088815%2Cxv-miedzynarodowy-festiwal-filmow-przyrodniczych-im-wlodzimierza-puchalskiego&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35482027%2C32-koszalinski-festiwal-debiutow-filmowych-mlodzi-i-film-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481411%2C6-miedzynarodowy-festiwal-filmow-animowanych-animator-&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481973%2C13-miedzynarodowy-festiwal-filmowy-sopot-film-festival&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F19323613%2C13-miedzynarodowy-festiwal-filmowy-t-mobile-nowe-horyzonty&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481928%2C7-festiwal-filmu-i-sztuki-dwa-brzegi-w-kazimierzu-dolnym-i-janowcu&_eventsearch_WAR_eventsearchportlet_INSTANCE_rHUXlm2Y2veh_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F35481991%2C3-miedzynarodowy-festiwal-filmu-i-muzyki-transatlantyk'), (u'Konkursy', u'http://stopklatka.pl/konkursy?p_p_id=eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA&p_p_lifecycle=2&p_p_state=normal&p_p_mode=view&p_p_cacheability=cacheLevelPage&p_p_col_id=column-1&p_p_col_pos=3&p_p_col_count=5&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=47091950&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48879762&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_assetEntryIds=48880109&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F47091941%2Cksiazki-dwie-kobiety-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48879753%2Cdvd-rozmowy-noca-&_eventsearch_WAR_eventsearchportlet_INSTANCE_Yks1FKgVbrOA_urls=http%3A%2F%2Fstopklatka.pl%2F-%2F48880000%2Cdvd-milosc-'), (u'Komiks Stopklatki', u'http://stopklatka.pl/komiks/-/asset_publisher/pKhn5s0IxqSc/rss?p_p_cacheability=cacheLevelPage')] def append_page(self, soup, appendtag): @@ -30,7 +30,7 @@ class Stopklatka_pl(BasicNewsRecipe): if tag: while tag: url = tag['href'] - soup2 = self.index_to_soup(url) + soup2 = self.index_to_soup(self.BASEURL+url) tag = soup2.find('a', attrs={'class': 'next'}) pagetext = soup2.find(attrs={'class': 'journal-content-article'}) comments = pagetext.findAll(text=lambda text:isinstance(text, Comment)) @@ -40,7 +40,11 @@ class Stopklatka_pl(BasicNewsRecipe): appendtag.insert(pos, pagetext) appendtag.find('a', attrs={'class': 'next'}).extract() - def preprocess_html(self, soup): self.append_page(soup, soup.body) - return soup \ No newline at end of file + return soup + + def get_browser(self): + br = BasicNewsRecipe.get_browser(self) + br.open(self.BASEURL) + return br \ No newline at end of file diff --git a/recipes/tablety_pl.recipe b/recipes/tablety_pl.recipe index c0d8b66c3d..827a86180e 100644 --- a/recipes/tablety_pl.recipe +++ b/recipes/tablety_pl.recipe @@ -14,7 +14,5 @@ class Tablety_pl(BasicNewsRecipe): max_articles_per_feed = 100 preprocess_regexps = [(re.compile(ur'

Przeczytaj także.*?

', re.DOTALL), lambda match: ''), (re.compile(ur'

Przeczytaj koniecznie.*?

', re.DOTALL), lambda match: '')] keep_only_tags = [dict(id='news_block')] - #remove_tags_before=dict(name="h1", attrs={'class':'entry-title'}) - #remove_tags_after=dict(name="footer", attrs={'class':'entry-footer clearfix'}) - remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments']})] + remove_tags=[dict(attrs={'class':['comments_icon', 'wp-polls', 'entry-comments', 'wp-polls-loading', 'ts-fab-wrapper', 'entry-footer']})] feeds = [(u'Najnowsze posty', u'http://www.tablety.pl/feed/')] \ No newline at end of file