From 118632d6302de3aa0c0e430d4ba35afb62e00f5c Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Thu, 29 Mar 2012 14:09:03 +0530
Subject: [PATCH] NRC Handelsblad (free) by veezh

---
 recipes/nrc_handelsblad.recipe | 76 ++++++++++++++++++++++++++++++++++
 1 file changed, 76 insertions(+)
 create mode 100644 recipes/nrc_handelsblad.recipe

diff --git a/recipes/nrc_handelsblad.recipe b/recipes/nrc_handelsblad.recipe
new file mode 100644
index 0000000000..9675b191cc
--- /dev/null
+++ b/recipes/nrc_handelsblad.recipe
@@ -0,0 +1,76 @@
+__license__   = 'GPL v3'
+__copyright__ = '2012'
+'''
+nrc.nl
+'''
+from calibre.web.feeds.recipes import BasicNewsRecipe
+
+class NRC(BasicNewsRecipe):
+    title                  = 'NRC Handelsblad'
+    __author__             = 'veezh'
+    description            = 'Nieuws'
+    oldest_article         = 1
+    max_articles_per_feed  = 100
+    no_stylesheets         = True
+    #delay                  = 1
+    use_embedded_content   = False
+    encoding               = 'utf-8'
+    publisher              = 'nrc.nl'
+    category               = 'news, Netherlands, world'
+    language               = 'nl'
+    timefmt = ''
+    #publication_type       = 'newsportal'
+    extra_css = '''
+                    h1{font-size:130%;}
+                    #h2{font-size:100%;font-weight:normal;}
+                    #.href{font-size:xx-small;}
+                    .bijschrift{color:#666666; font-size:x-small;}
+                    #.main-article-info{font-family:Arial,Helvetica,sans-serif;}
+                    #full-contents{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
+                    #match-stats-summary{font-size:small; font-family:Arial,Helvetica,sans-serif;font-weight:normal;}
+                '''
+    #preprocess_regexps     = [(re.compile(r'<!--.*?-->', re.DOTALL), lambda m: '')]
+    conversion_options = {
+                              'comments'        : description
+                             ,'tags'            : category
+                             ,'language'        : language
+                             ,'publisher'       : publisher
+                             ,'linearize_tables': True
+                          }
+
+    remove_empty_feeds = True
+
+    filterDuplicates = True
+
+    def preprocess_html(self, soup):
+        for alink in soup.findAll('a'):
+            if alink.string is not None:
+               tstr = alink.string
+               alink.replaceWith(tstr)
+        return soup
+
+    keep_only_tags = [dict(name='div', attrs={'class':'article'})]
+    remove_tags_after = [dict(id='broodtekst')]
+
+#    keep_only_tags    = [
+#                       dict(name='div', attrs={'class':['label']})
+#                        ]
+
+#    remove_tags_after = [dict(name='dl', attrs={'class':['tags']})]
+
+#    def get_article_url(self, article):
+#        link = article.get('link')
+#        if 'blog' not in link and ('chat' not in link):
+#             return link
+
+    feeds          = [
+#                      ('Nieuws', 'http://www.nrc.nl/rss.php'),
+                      ('Binnenland', 'http://www.nrc.nl/nieuws/categorie/binnenland/rss.php'),
+                      ('Buitenland', 'http://www.nrc.nl/nieuws/categorie/buitenland/rss.php'),
+                      ('Economie', 'http://www.nrc.nl/nieuws/categorie/economie/rss.php'),
+                      ('Wetenschap', 'http://www.nrc.nl/nieuws/categorie/wetenschap/rss.php'),
+                      ('Cultuur', 'http://www.nrc.nl/nieuws/categorie/cultuur/rss.php'),
+                      ('Boeken', 'http://www.nrc.nl/boeken/rss.php'),
+                      ('Tech', 'http://www.nrc.nl/tech/rss.php/'),
+                      ('Klimaat', 'http://www.nrc.nl/klimaat/rss.php/'),
+                      ]