diff --git a/recipes/swiatkindle.recipe b/recipes/swiatkindle.recipe index d8e0e3f403..c589d1b6e1 100644 --- a/recipes/swiatkindle.recipe +++ b/recipes/swiatkindle.recipe @@ -19,6 +19,7 @@ class swiatczytnikow(BasicNewsRecipe): feeds = [(u'Świat Czytników - wpisy', u'http://swiatczytnikow.pl/feed')] - remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})] + remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'}), + dict(name = 'div', attrs = {'class' : 'feedflare'})] preprocess_regexps = [(re.compile(u'

Czytaj dalej:

'), lambda match: '')] diff --git a/recipes/telepolis_pl.recipe b/recipes/telepolis_pl.recipe index 9ea878bc77..1aa7734c2c 100644 --- a/recipes/telepolis_pl.recipe +++ b/recipes/telepolis_pl.recipe @@ -16,11 +16,31 @@ class telepolis(BasicNewsRecipe): use_embedded_content = False feeds = [ - (u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#, - #(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php') + (u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html') ] keep_only_tags = [ dict(name='div', attrs={'class':'flol w510'}), + dict(name='div', attrs={'class':'main_tresc'}), dict(name='div', attrs={'class':'main_tresc_news'}) ] + + def append_page(self, soup, appendtag): + chpage= appendtag.find(attrs={'class':'str'}) + if chpage: + for page in chpage.findAll('a'): + if page.renderContents() == 'Następna ›': + break + soup2 = self.index_to_soup(page['href']) + pagetext = soup2.find(attrs={'class':'main_tresc'}) + pos = len(appendtag.contents) + appendtag.insert(pos, pagetext) + for r in appendtag.findAll(attrs={'class':'str'}): + r.extract() + + def preprocess_html(self, soup): + self.append_page(soup, soup.body) + for image in soup.findAll('img'): + if 'm.jpg' in image['src']: + image['src'] = image['src'].replace('m.jpg', '.jpg') + return soup