New recipe for Vrij Nederland by kwetal

2026-02-05 18:43:30 -05:00 · 2009-11-30 13:41:39 -07:00 · 2009-11-30 13:41:39 -07:00 · 7ebdad563a
commit 7ebdad563a
parent 9c59b44a7e
2 changed files with 77 additions and 1 deletions
--- a/Changelog.yaml
+++ b/Changelog.yaml
@ -62,7 +62,7 @@
    - title: The Economist (no subscription required)
      author: Kovid Goyal

-    - title: Sports Illustrated1
+    - title: Sports Illustrated
      author: kwetal

    - title: Levante
--- a/resources/recipes/vrijnederland.recipe
+++ b/resources/recipes/vrijnederland.recipe
@ -0,0 +1,76 @@
+from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.ebooks.BeautifulSoup import Tag
+
+class VrijNederlandRecipe(BasicNewsRecipe) :
+    __license__   = 'GPL v3'
+    __author__ = 'kwetal'
+    language = 'nl_NL'
+    locale = 'nl_NL'
+    version = 1
+
+    title = u'Vrij Nederland'
+    publisher = u'Weekbladpers Tijdschriften'
+    category = u'News, Opinion'
+    description = u'Weekly opinion magazine from the Netherlands'
+
+    oldest_article = 7
+    max_articles_per_feed = 100
+    use_embedded_content = False
+
+    no_stylesheets = True
+    remove_javascript = True
+    # Does not seem to work
+    #extra_css = '''li.calibre2 {padding-bottom: 40px}'''
+
+    conversion_options = {'publisher': publisher, 'tags': category, 'comments': description}
+
+    feeds = []
+    feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss'))
+    feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss'))
+    feeds.append((u'Economie', u'http://www.vn.nl/economie.rss'))
+    feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss'))
+    feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss'))
+    feeds.append((u'Crime', u'http://www.vn.nl/crime.rss'))
+    feeds.append((u'Media', u'http://www.vn.nl/media.rss'))
+    feeds.append((u'De Republiek der Letteren', u'http://www.vn.nl/republiek.rss'))
+    feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss'))
+    feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss'))
+    feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss'))
+    feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss'))
+    feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss'))
+    feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss'))
+    feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss'))
+    feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss'))
+
+    keep_only_tags = [dict(name = 'div', attrs = {'class' : 'cl-column column-one'})]
+
+    remove_tags = []
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element guest-book-overview'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element forum-message-form'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class' : 'mediaterms'}))
+    remove_tags.append(dict(name = 'div', attrs = {'class': 'label-term'}))
+    remove_tags.append(dict(name =  'div', attrs =  {'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'}))
+    remove_tags.append(dict(name = 'object'))
+    remove_tags.append(dict(name = 'link'))
+    remove_tags.append(dict(name = 'meta'))
+
+    def preprocess_html(self, soup):
+        # Just clean up the result a little
+        meta = soup.find('div', attrs = {'class': 'meta'})
+        if meta:
+            link = meta.find('span', attrs = {'class': 'link'})
+            if link:
+                link.extract()
+            for seperator in meta.findAll('span', attrs = {'class': 'seperator'}):
+                seperator.extract()
+
+        # Their header is full of 'if IE6/7/8' tags. Just get rid of it altogether
+        theirHead = soup.head
+        theirHead.extract()
+        myHead = Tag(soup, 'head')
+        soup.insert(0, myHead)
+
+        return soup
+
+
+