calibre/recipes/satkurier.recipe
Kovid Goyal 29cd8d64ea
Change shebangs to python from python2
Also remove a few other miscellaneous references to python2
2020-08-22 18:47:51 +05:30

42 lines
1.4 KiB
Python

#!/usr/bin/env python
__license__ = 'GPL v3'
from calibre.web.feeds.news import BasicNewsRecipe
class SATKurier(BasicNewsRecipe):
title = u'SATKurier.pl'
__author__ = 'Artur Stachecki <artur.stachecki@gmail.com>'
language = 'pl'
description = u'Serwis poświęcony telewizji cyfrowej'
oldest_article = 7
masthead_url = 'http://satkurier.pl/img/header_sk_logo.gif'
max_articles_per_feed = 100
simultaneous_downloads = 5
remove_javascript = True
no_stylesheets = True
keep_only_tags = [dict(name='div', attrs={'id': ['leftNewsContainer', 'content']})]
remove_tags = [dict(name='div', attrs={'class': ['col-xs-20', 'coverNews','btn-group']})]
remove_tags_after = [dict(name='div',attrs={'class':'btn-group'})]
feeds = [(u'Najnowsze wiadomości', u'http://feeds.feedburner.com/satkurierpl?format=xml'),
(u'Sport w telewizji',
u'http://feeds.feedburner.com/satkurier/sport?format=xml'),
(u'Blog', u'http://feeds.feedburner.com/satkurier/blog?format=xml')]
def preprocess_html(self, soup):
image = soup.find(attrs={'id': 'news_mini_photo'})
if image:
image.extract()
header = soup.find('h1')
header.replaceWith(header.prettify() + image.prettify())
for alink in soup.findAll('a'):
if alink.string is not None:
tstr = alink.string
alink.replaceWith(tstr)
return soup