diff --git a/recipes/kathemerini.recipe b/recipes/kathemerini.recipe index e6fb252a85..9dde5aca63 100644 --- a/recipes/kathemerini.recipe +++ b/recipes/kathemerini.recipe @@ -2,37 +2,25 @@ from calibre.web.feeds.recipes import BasicNewsRecipe class Kathimerini(BasicNewsRecipe): title = 'Kathimerini' - __author__ = 'Pan' + __author__ = 'jenniepet' description = 'News from Greece' max_articles_per_feed = 100 - oldest_article = 100 + oldest_article = 1 publisher = 'Kathimerini' category = 'news, GR' language = 'el' - encoding = 'windows-1253' - conversion_options = { 'linearize_tables': True} + encoding = 'utf-8' + conversion_options = {'linearize_tables': True} no_stylesheets = True - remove_tags_before = dict(name='td',attrs={'class':'news'}) - remove_tags_after = dict(name='td',attrs={'class':'news'}) - remove_attributes = ['width', 'src','header','footer'] - - feeds = [(u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03ba\u03ae', - 'http://wk.kathimerini.gr/xml_files/politics.xml'), - (u'\u0395\u03bb\u03bb\u03ac\u03b4\u03b1', - ' http://wk.kathimerini.gr/xml_files/ell.xml'), - (u'\u039a\u03cc\u03c3\u03bc\u03bf\u03c2', - ' http://wk.kathimerini.gr/xml_files/world.xml'), - (u'\u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', - 'http://wk.kathimerini.gr/xml_files/economy_1.xml'), - (u'\u0395\u03c0\u03b9\u03c7\u03b5\u03b9\u03c1\u03ae\u03c3\u03b5\u03b9\u03c2', - 'http://wk.kathimerini.gr/xml_files/economy_2.xml'), - (u'\u0394\u03b9\u03b5\u03b8\u03bd\u03ae\u03c2 \u039f\u03b9\u03ba\u03bf\u03bd\u03bf\u03bc\u03af\u03b1', - 'http://wk.kathimerini.gr/xml_files/economy_3.xml'), - (u'\u03a0\u03bf\u03bb\u03b9\u03c4\u03b9\u03c3\u03bc\u03cc\u03c2', - 'http://wk.kathimerini.gr/xml_files/civ.xml'), - (u'\u039c\u03cc\u03bd\u03b9\u03bc\u03b5\u03c2 \u03a3\u03c4\u03ae\u03bb\u03b5\u03c2', - 'http://wk.kathimerini.gr/xml_files/st.xml')] - - def print_version(self, url): - return url.replace('http://news.kathimerini.gr/4dcgi/', 'http://news.kathimerini.gr/4dcgi/4dcgi/') + remove_tags_before = dict(id='site-body') + remove_tags_after = [dict(id='social')] + remove_tags = [dict(attrs={'class':['post-tools', 'edition edition_PRINT']})] + feeds = [(u'1','http://www.kathimerini.gr/rss?i=news.el.search&q=&t=0&w=&c=&s=p&type=&edition=PRINT&author=0&fromDate=0&toDate=0'), +(u'2','http://www.kathimerini.gr/rss?i=news.el.search&q=&t=0&w=&c=&s=p&type=&edition=PRINT&author=0&fromDate=0&toDate=0&page=1'), +(u'3','http://www.kathimerini.gr/rss?i=news.el.search&q=&t=0&w=&c=&s=p&type=&edition=PRINT&author=0&fromDate=0&toDate=0&page=2'), +(u'4','http://www.kathimerini.gr/rss?i=news.el.search&q=&t=0&w=&c=&s=p&type=&edition=PRINT&author=0&fromDate=0&toDate=0&page=3'), +(u'5','http://www.kathimerini.gr/rss?i=news.el.search&q=&t=0&w=&c=&s=p&type=&edition=PRINT&author=0&fromDate=0&toDate=0&page=4')] + def get_cover_url(self): + import time + return 'http://s.kathimerini.gr/resources/issue-cover/%s.jpg' %time.strftime('%d-%m-%Y')