__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' europasur.es ''' from calibre.web.feeds.news import BasicNewsRecipe class Europasur(BasicNewsRecipe): title = 'Europa Sur' __author__ = 'Darko Miletic' description = 'News in Spanish' publisher = 'Joly Digital' category = 'news, politics, Spanish' oldest_article = 2 max_articles_per_feed = 100 use_embedded_content = False remove_empty_feeds = True delay = 2 no_stylesheets = True encoding = 'cp1252' language = 'es' publication_type = 'newspaper' extra_css = """ body{font-family: Verdana,Arial,Helvetica,sans-serif} h2{font-family: Georgia,Times New Roman,Times,serif} .subtitle{font-weight:bold} .caption{font-size: small} .body{font-size: 1.1em} .info{color: #848484} """ conversion_options = { 'comment': description, 'tags': category, 'publisher': publisher, 'language': language } keep_only_tags = [ dict(attrs={'class': ['titles', 'current']}), dict( attrs={'id': 'newsBody'}) ] remove_tags = [ dict(name=['iframe', 'base', 'embed', 'object']), dict(name='a', attrs={ 'class': 'zoom thickbox'}), dict(name='div', attrs={'class': 'other'}) ] remove_attributes = ['width', 'height'] feeds = [ (u'Portada', u'http://www.europasur.es/rss/articles.php'), (u'Deportes', u'http://www.europasur.es/rss/articles.php?sec=1224'), (u'Economia', u'http://www.europasur.es/rss/articles.php?sec=427'), (u'Espana', u'http://www.europasur.es/rss/articles.php?sec=437'), (u'Mundo', u'http://www.europasur.es/rss/articles.php?sec=428'), (u'Pasarela', u'http://www.europasur.es/rss/articles.php?sec=1958'), (u'Ocio y cultura', u'http://www.europasur.es/rss/articles.php?sec=1210'), (u'Opinion', u'http://www.europasur.es/rss/articles.php?sec=1195'), (u'Tecnologia', u'http://www.europasur.es/rss/articles.php?sec=1681'), (u'Salud', u'http://www.europasur.es/rss/articles.php?sec=2379') ] def image_url_processor(self, baseurl, url): artl, sep, width = url.rpartition('&an=') artid, sep, ext = artl.rpartition('.') article_id = artid.rpartition('/')[2] return 'http://media.grupojoly.com/cache/' + article_id + '_' + width + 'x' + width + '_' + ext + '000.' + ext