__license__ = 'GPL v3' __copyright__ = '2012' ''' nrc.nl ''' from calibre.web.feeds.recipes import BasicNewsRecipe class NRC(BasicNewsRecipe): title = 'NRC Handelsblad' __author__ = 'veezh' description = 'Nieuws (no subscription needed)' oldest_article = 1 max_articles_per_feed = 100 no_stylesheets = True use_embedded_content = False encoding = 'utf-8' publisher = 'nrc.nl' category = 'news, Netherlands, world' language = 'nl' timefmt = '' extra_css = ''' h1{font-size:130%;} #h2{font-size:100%;font-weight:normal;} #.href{font-size:xx-small;} .bijschrift{color:#666666; font-size:x-small;} #.main-article-info{font-family:Arial,Helvetica,sans-serif;} #full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;} ''' conversion_options = { 'comments': description, 'tags': category, 'language': language, 'publisher': publisher, 'linearize_tables': True } remove_empty_feeds = True filterDuplicates = True def preprocess_html(self, soup): for alink in soup.findAll('a'): if alink.string is not None: tstr = alink.string alink.replaceWith(tstr) return soup keep_only_tags = [dict(name='div', attrs={'class': 'article'})] remove_tags_after = [dict(id='broodtekst')] # keep_only_tags = [ # dict(name='div', attrs={'class':['label']}) # ] # remove_tags_after = [dict(name='dl', attrs={'class':['tags']})] # def get_article_url(self, article): # link = article.get('link') # if 'blog' not in link and ('chat' not in link): # return link feeds = [ # ('Nieuws', 'http://www.nrc.nl/rss.php'), ('Binnenland', 'http://www.nrc.nl/nieuws/categorie/binnenland/rss.php'), ('Buitenland', 'http://www.nrc.nl/nieuws/categorie/buitenland/rss.php'), ('Economie', 'http://www.nrc.nl/nieuws/categorie/economie/rss.php'), ('Wetenschap', 'http://www.nrc.nl/nieuws/categorie/wetenschap/rss.php'), ('Cultuur', 'http://www.nrc.nl/nieuws/categorie/cultuur/rss.php'), ('Boeken', 'http://www.nrc.nl/boeken/rss.php'), ('Tech', 'http://www.nrc.nl/tech/rss.php/'), ('Klimaat', 'http://www.nrc.nl/klimaat/rss.php/'), ]