mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
59 lines
2.2 KiB
Plaintext
59 lines
2.2 KiB
Plaintext
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
from calibre.ebooks.BeautifulSoup import BeautifulSoup, BeautifulStoneSoup
|
|
|
|
class Ekathimerini(BasicNewsRecipe):
|
|
title = 'ekathimerini'
|
|
__author__ = 'Thomas Scholl'
|
|
description = 'News from Greece, English edition'
|
|
masthead_url = 'http://wwk.kathimerini.gr/webadmin/EnglishNew/gifs/logo.gif'
|
|
max_articles_per_feed = 100
|
|
oldest_article = 100
|
|
publisher = 'Kathimerini'
|
|
category = 'news, GR'
|
|
language = 'en_GR'
|
|
encoding = 'windows-1253'
|
|
conversion_options = { 'linearize_tables': True}
|
|
no_stylesheets = True
|
|
delay = 1
|
|
keep_only_tags = [dict(name='td', attrs={'class':'news'})]
|
|
|
|
rss_url = 'http://ws.kathimerini.gr/xml_files/latestnews.xml'
|
|
|
|
def find_articles(self, idx, category):
|
|
for article in idx.findAll('item'):
|
|
cat = u''
|
|
cat_elem = article.find('subcat')
|
|
if cat_elem:
|
|
cat = self.tag_to_string(cat_elem)
|
|
|
|
if cat == category:
|
|
desc_html = self.tag_to_string(article.find('description'))
|
|
description = self.tag_to_string(BeautifulSoup(desc_html))
|
|
|
|
a = {
|
|
'title': self.tag_to_string(article.find('title')),
|
|
'url': self.tag_to_string(article.find('link')),
|
|
'description': description,
|
|
'date' : self.tag_to_string(article.find('pubdate')),
|
|
}
|
|
yield a
|
|
|
|
|
|
def parse_index(self):
|
|
idx_contents = self.browser.open(self.rss_url).read()
|
|
idx = BeautifulStoneSoup(idx_contents, convertEntities=BeautifulStoneSoup.XML_ENTITIES)
|
|
|
|
cats = list(set([self.tag_to_string(subcat) for subcat in idx.findAll('subcat')]))
|
|
cats.sort()
|
|
|
|
feeds = [(u'News',list(self.find_articles(idx, u'')))]
|
|
|
|
for cat in cats:
|
|
feeds.append((cat.capitalize(), list(self.find_articles(idx, cat))))
|
|
|
|
return feeds
|
|
|
|
def print_version(self, url):
|
|
return url.replace('http://www.ekathimerini.com/4dcgi/', 'http://www.ekathimerini.com/4Dcgi/4dcgi/')
|
|
|