mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
fix telepolis_pl and improve swiatkindle
This commit is contained in:
parent
a98c808a47
commit
41b9342a2a
@ -19,6 +19,7 @@ class swiatczytnikow(BasicNewsRecipe):
|
|||||||
|
|
||||||
feeds = [(u'Świat Czytników - wpisy', u'http://swiatczytnikow.pl/feed')]
|
feeds = [(u'Świat Czytników - wpisy', u'http://swiatczytnikow.pl/feed')]
|
||||||
|
|
||||||
remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'})]
|
remove_tags = [dict(name = 'ul', attrs = {'class' : 'similar-posts'}),
|
||||||
|
dict(name = 'div', attrs = {'class' : 'feedflare'})]
|
||||||
|
|
||||||
preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
|
preprocess_regexps = [(re.compile(u'<h3>Czytaj dalej:</h3>'), lambda match: '')]
|
||||||
|
@ -16,11 +16,31 @@ class telepolis(BasicNewsRecipe):
|
|||||||
use_embedded_content = False
|
use_embedded_content = False
|
||||||
|
|
||||||
feeds = [
|
feeds = [
|
||||||
(u'Wiadomości', u'http://www.telepolis.pl/rss/news.php')#,
|
(u'Wiadomości', u'http://www.telepolis.pl/rss,2,5,0.html')
|
||||||
#(u'Artykuły', u'http://www.telepolis.pl/rss/artykuly.php')
|
|
||||||
]
|
]
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(name='div', attrs={'class':'flol w510'}),
|
dict(name='div', attrs={'class':'flol w510'}),
|
||||||
|
dict(name='div', attrs={'class':'main_tresc'}),
|
||||||
dict(name='div', attrs={'class':'main_tresc_news'})
|
dict(name='div', attrs={'class':'main_tresc_news'})
|
||||||
]
|
]
|
||||||
|
|
||||||
|
def append_page(self, soup, appendtag):
|
||||||
|
chpage= appendtag.find(attrs={'class':'str'})
|
||||||
|
if chpage:
|
||||||
|
for page in chpage.findAll('a'):
|
||||||
|
if page.renderContents() == 'Następna ›':
|
||||||
|
break
|
||||||
|
soup2 = self.index_to_soup(page['href'])
|
||||||
|
pagetext = soup2.find(attrs={'class':'main_tresc'})
|
||||||
|
pos = len(appendtag.contents)
|
||||||
|
appendtag.insert(pos, pagetext)
|
||||||
|
for r in appendtag.findAll(attrs={'class':'str'}):
|
||||||
|
r.extract()
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
self.append_page(soup, soup.body)
|
||||||
|
for image in soup.findAll('img'):
|
||||||
|
if 'm.jpg' in image['src']:
|
||||||
|
image['src'] = image['src'].replace('m.jpg', '.jpg')
|
||||||
|
return soup
|
||||||
|
Loading…
x
Reference in New Issue
Block a user