calibre/recipes/telepolis_pl.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

47 lines
1.5 KiB
Python

#!/usr/bin/env python2
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class telepolis(BasicNewsRecipe):
title = u'Telepolis.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>, Tomasz Długosz <tomek3d@gmail.com>'
language = 'pl'
description = u'Twój telekomunikacyjny serwis informacyjny.'
masthead_url = 'http://telepolis.pl/i/telepolis-logo2.gif'
no_stylesheets = True
use_embedded_content = False
feeds = [
(u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html')
]
keep_only_tags = [
dict(name='div', attrs={'class': 'flol w510'}),
dict(name='div', attrs={'class': 'main_tresc'}),
dict(name='div', attrs={'class': 'main_tresc_news'})
]
def append_page(self, soup, appendtag):
chpage = appendtag.find(attrs={'class': 'str'})
if chpage:
for page in chpage.findAll('a'):
if page.renderContents() == 'Następna &rsaquo;':
break
soup2 = self.index_to_soup(page['href'])
pagetext = soup2.find(attrs={'class': 'main_tresc'})
pos = len(appendtag.contents)
appendtag.insert(pos, pagetext)
for r in appendtag.findAll(attrs={'class': 'str'}):
r.extract()
def preprocess_html(self, soup):
self.append_page(soup, soup.body)
for image in soup.findAll('img'):
if 'm.jpg' in image['src']:
image['src'] = image['src'].replace('m.jpg', '.jpg')
return soup