calibre/recipes/ekathemerini.recipe
Kovid Goyal 567040ee1e Perform PEP8 compliance checks on the entire codebase
Some bits of PEP 8 are turned off via setup.cfg
2016-07-29 21:25:17 +05:30

61 lines
2.1 KiB
Plaintext

from calibre.web.feeds.recipes import BasicNewsRecipe
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
class Ekathimerini(BasicNewsRecipe):
title = 'ekathimerini'
__author__ = 'Thomas Scholl'
description = 'News from Greece, English edition'
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
max_articles_per_feed = 100
oldest_article = 100
publisher = 'Kathimerini'
category = 'news, GR'
language = 'en_GR'
encoding = 'windows-1253'
conversion_options = {'linearize_tables': True}
no_stylesheets = True
delay = 1
keep_only_tags = [dict(name='td', attrs={'class': 'news'})]
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
def find_articles(self, idx, category):
for article in idx.findAll('item'):
cat = u''
cat_elem = article.find('subcat')
if cat_elem:
cat = self.tag_to_string(cat_elem)
if cat == category:
desc_html = self.tag_to_string(article.find('description'))
description = self.tag_to_string(BeautifulSoup(desc_html))
a = {
'title': self.tag_to_string(article.find('title')),
'url': self.tag_to_string(article.find('link')),
'description': description,
'date': self.tag_to_string(article.find('pubdate')),
}
yield a
def parse_index(self):
idx_contents = self.browser.open(self.rss_url).read()
idx = BeautifulStoneSoup(
idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
cats = list(set([self.tag_to_string(subcat)
for subcat in idx.findAll('subcat')]))
cats.sort()
feeds = [(u'News', list(self.find_articles(idx, u'')))]
for cat in cats:
feeds.append((cat.capitalize(), list(
self.find_articles(idx, cat))))
return feeds
def print_version(self, url):
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')