calibre/recipes/tyden.cz.recipe
2012-11-18 23:53:33 +05:30

45 lines
1.3 KiB
Plaintext

# vim:fileencoding=UTF-8:ts=4:sw=4:sta:et:sts=4:ai
from __future__ import unicode_literals
from calibre.web.feeds.recipes import BasicNewsRecipe
class tydenRecipe(BasicNewsRecipe):
__author__ = 'bubak'
title = u'Tyden.cz'
publisher = u''
description = ''
oldest_article = 1
max_articles_per_feed = 20
feeds = [
(u'Domácí', u'http://www.tyden.cz/rss/rss.php?rubrika_id=6'),
(u'Politika', u'http://www.tyden.cz/rss/rss.php?rubrika_id=173'),
(u'Kauzy', u'http://www.tyden.cz/rss/rss.php?rubrika_id=340')
]
#encoding = 'iso-8859-2'
language = 'cs'
cover_url = 'http://www.tyden.cz/img/tyden-logo.png'
remove_javascript = True
no_stylesheets = True
remove_attributes = []
remove_tags_before = dict(name='p', attrs={'id':['breadcrumbs']})
remove_tags_after = dict(name='p', attrs={'class':['author']})
visited_urls = {}
def get_article_url(self, article):
url = BasicNewsRecipe.get_article_url(self, article)
if url in self.visited_urls:
self.log.debug('Ignoring duplicate: ' + url)
return None
else:
self.visited_urls[url] = True
self.log.debug('Accepting: ' + url)
return url