diff --git a/Changelog.yaml b/Changelog.yaml index 5ff3b1969d..1baa99006b 100644 --- a/Changelog.yaml +++ b/Changelog.yaml @@ -62,7 +62,7 @@ - title: The Economist (no subscription required) author: Kovid Goyal - - title: Sports Illustrated1 + - title: Sports Illustrated author: kwetal - title: Levante diff --git a/resources/recipes/vrijnederland.recipe b/resources/recipes/vrijnederland.recipe new file mode 100644 index 0000000000..7bae6e2ee0 --- /dev/null +++ b/resources/recipes/vrijnederland.recipe @@ -0,0 +1,76 @@ +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class VrijNederlandRecipe(BasicNewsRecipe) : + __license__ = 'GPL v3' + __author__ = 'kwetal' + language = 'nl_NL' + locale = 'nl_NL' + version = 1 + + title = u'Vrij Nederland' + publisher = u'Weekbladpers Tijdschriften' + category = u'News, Opinion' + description = u'Weekly opinion magazine from the Netherlands' + + oldest_article = 7 + max_articles_per_feed = 100 + use_embedded_content = False + + no_stylesheets = True + remove_javascript = True + # Does not seem to work + #extra_css = '''li.calibre2 {padding-bottom: 40px}''' + + conversion_options = {'publisher': publisher, 'tags': category, 'comments': description} + + feeds = [] + feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss')) + feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss')) + feeds.append((u'Economie', u'http://www.vn.nl/economie.rss')) + feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss')) + feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss')) + feeds.append((u'Crime', u'http://www.vn.nl/crime.rss')) + feeds.append((u'Media', u'http://www.vn.nl/media.rss')) + feeds.append((u'De Republiek der Letteren', u'http://www.vn.nl/republiek.rss')) + feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss')) + feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss')) + feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss')) + feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss')) + feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss')) + feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss')) + feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss')) + feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss')) + + keep_only_tags = [dict(name = 'div', attrs = {'class' : 'cl-column column-one'})] + + remove_tags = [] + remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element guest-book-overview'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element forum-message-form'})) + remove_tags.append(dict(name = 'div', attrs = {'class' : 'mediaterms'})) + remove_tags.append(dict(name = 'div', attrs = {'class': 'label-term'})) + remove_tags.append(dict(name = 'div', attrs = {'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'})) + remove_tags.append(dict(name = 'object')) + remove_tags.append(dict(name = 'link')) + remove_tags.append(dict(name = 'meta')) + + def preprocess_html(self, soup): + # Just clean up the result a little + meta = soup.find('div', attrs = {'class': 'meta'}) + if meta: + link = meta.find('span', attrs = {'class': 'link'}) + if link: + link.extract() + for seperator in meta.findAll('span', attrs = {'class': 'seperator'}): + seperator.extract() + + # Their header is full of 'if IE6/7/8' tags. Just get rid of it altogether + theirHead = soup.head + theirHead.extract() + myHead = Tag(soup, 'head') + soup.insert(0, myHead) + + return soup + + +