import re from calibre.web.feeds.news import BasicNewsRecipe class NaTemat(BasicNewsRecipe): title = u'NaTemat.pl' oldest_article = 7 max_articles_per_feed = 100 __author__ = 'fenuks' description = u'informacje, komentarze, opinie' category = 'news' language = 'pl' preprocess_regexps = [(re.compile(u'Czytaj też\\:.*?', re.IGNORECASE), lambda m: ''), (re.compile(u'Zobacz też\\:.*?', re.IGNORECASE), lambda m: ''), # noqa (re.compile(u'Czytaj więcej\\:.*?', re.IGNORECASE), lambda m: ''), (re.compile(u'Czytaj również\\:.*?', re.IGNORECASE), lambda m: '')] # noqa cover_url = 'http://blog.plona.pl/wp-content/uploads/2012/05/natemat.png' no_stylesheets = True keep_only_tags = [dict(id='main')] remove_tags = [dict(attrs={'class': ['button', 'block-inside style_default', 'article-related', 'user-header', 'links']}), dict(name='img', attrs={'class': 'indent'})] feeds = [(u'Artyku\u0142y', u'http://natemat.pl/rss/wszystkie')]