__license__ = 'GPL v3' __copyright__ = '2010, Darko Miletic ' ''' elsevier.nl ''' from calibre.web.feeds.news import BasicNewsRecipe class Pagina12(BasicNewsRecipe): title = 'Elsevier.nl' __author__ = 'Darko Miletic' description = 'News from Holland' publisher = 'elsevier.nl' category = 'news, politics, Holland' oldest_article = 2 max_articles_per_feed = 200 no_stylesheets = True encoding = 'utf-8' use_embedded_content = False language = 'nl' country = 'NL' remove_empty_feeds = True masthead_url = 'http://www.elsevier.nl/static/elsevier/stdimg/logo.gif' extra_css = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em} ' conversion_options = { 'comment' : description , 'tags' : category , 'publisher' : publisher , 'language' : language } keep_only_tags = dict(attrs={'id':'artikel_container'}) remove_tags_before = dict(attrs={'id':'breadcrumb_container'}) remove_tags_after = dict(attrs={'class':'author_link'}) remove_tags = [ dict(attrs={'id':'breadcrumb_container'}) ,dict(name='div',attrs={'class':'pullout_vak'}) ] remove_attributes = ['width','height'] feeds = [ (u'Laatste nieuws' , u'http://www.elsevier.nl/web/RSS/Homepage-RSS.htm?output=xml' ) ,(u'Nederland' , u'http://www.elsevier.nl/web/RSS/Nederland-RSS.htm?output=xml' ) ,(u'Politiek' , u'http://www.elsevier.nl/web/RSS/Politiek-RSS.htm?output=xml' ) ,(u'Europese Unie' , u'http://www.elsevier.nl/web/RSS/Europese-Unie-RSS.htm?output=xml' ) ,(u'Buitenland' , u'http://www.elsevier.nl/web/RSS/Buitenland-RSS.htm?output=xml' ) ,(u'Economie' , u'http://www.elsevier.nl/web/RSS/Economie-RSS.htm?output=xml' ) ,(u'Wetenschap' , u'http://www.elsevier.nl/web/RSS/Wetenschap-RSS.htm?output=xml' ) ,(u'Cultuur & Televisie' , u'http://www.elsevier.nl/web/RSS/Cultuur-Televisie-RSS.htm?output=xml') ,(u'Society' , u'http://www.elsevier.nl/web/RSS/Society-RSS.htm?output=xml' ) ,(u'Internet&/Gadgets' , u'http://www.elsevier.nl/web/RSS/Internet-Gadgets-RSS.htm?output=xml' ) ,(u'Comentaren' , u'http://www.elsevier.nl/web/RSS/Commentaren-RSS.htm?output=xml' ) ] def print_version(self, url): return url + '?print=true' def get_article_url(self, article): return article.get('guid', None).rpartition('?')[0] def preprocess_html(self, soup): for item in soup.findAll(style=True): del item['style'] return soup