mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
New recipe for Vrij Nederland by kwetal
This commit is contained in:
parent
9c59b44a7e
commit
7ebdad563a
@ -62,7 +62,7 @@
|
|||||||
- title: The Economist (no subscription required)
|
- title: The Economist (no subscription required)
|
||||||
author: Kovid Goyal
|
author: Kovid Goyal
|
||||||
|
|
||||||
- title: Sports Illustrated1
|
- title: Sports Illustrated
|
||||||
author: kwetal
|
author: kwetal
|
||||||
|
|
||||||
- title: Levante
|
- title: Levante
|
||||||
|
76
resources/recipes/vrijnederland.recipe
Normal file
76
resources/recipes/vrijnederland.recipe
Normal file
@ -0,0 +1,76 @@
|
|||||||
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
class VrijNederlandRecipe(BasicNewsRecipe) :
|
||||||
|
__license__ = 'GPL v3'
|
||||||
|
__author__ = 'kwetal'
|
||||||
|
language = 'nl_NL'
|
||||||
|
locale = 'nl_NL'
|
||||||
|
version = 1
|
||||||
|
|
||||||
|
title = u'Vrij Nederland'
|
||||||
|
publisher = u'Weekbladpers Tijdschriften'
|
||||||
|
category = u'News, Opinion'
|
||||||
|
description = u'Weekly opinion magazine from the Netherlands'
|
||||||
|
|
||||||
|
oldest_article = 7
|
||||||
|
max_articles_per_feed = 100
|
||||||
|
use_embedded_content = False
|
||||||
|
|
||||||
|
no_stylesheets = True
|
||||||
|
remove_javascript = True
|
||||||
|
# Does not seem to work
|
||||||
|
#extra_css = '''li.calibre2 {padding-bottom: 40px}'''
|
||||||
|
|
||||||
|
conversion_options = {'publisher': publisher, 'tags': category, 'comments': description}
|
||||||
|
|
||||||
|
feeds = []
|
||||||
|
feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss'))
|
||||||
|
feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss'))
|
||||||
|
feeds.append((u'Economie', u'http://www.vn.nl/economie.rss'))
|
||||||
|
feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss'))
|
||||||
|
feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss'))
|
||||||
|
feeds.append((u'Crime', u'http://www.vn.nl/crime.rss'))
|
||||||
|
feeds.append((u'Media', u'http://www.vn.nl/media.rss'))
|
||||||
|
feeds.append((u'De Republiek der Letteren', u'http://www.vn.nl/republiek.rss'))
|
||||||
|
feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss'))
|
||||||
|
feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss'))
|
||||||
|
feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss'))
|
||||||
|
feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss'))
|
||||||
|
feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss'))
|
||||||
|
feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss'))
|
||||||
|
feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss'))
|
||||||
|
feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss'))
|
||||||
|
|
||||||
|
keep_only_tags = [dict(name = 'div', attrs = {'class' : 'cl-column column-one'})]
|
||||||
|
|
||||||
|
remove_tags = []
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element guest-book-overview'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element forum-message-form'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class' : 'mediaterms'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class': 'label-term'}))
|
||||||
|
remove_tags.append(dict(name = 'div', attrs = {'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'}))
|
||||||
|
remove_tags.append(dict(name = 'object'))
|
||||||
|
remove_tags.append(dict(name = 'link'))
|
||||||
|
remove_tags.append(dict(name = 'meta'))
|
||||||
|
|
||||||
|
def preprocess_html(self, soup):
|
||||||
|
# Just clean up the result a little
|
||||||
|
meta = soup.find('div', attrs = {'class': 'meta'})
|
||||||
|
if meta:
|
||||||
|
link = meta.find('span', attrs = {'class': 'link'})
|
||||||
|
if link:
|
||||||
|
link.extract()
|
||||||
|
for seperator in meta.findAll('span', attrs = {'class': 'seperator'}):
|
||||||
|
seperator.extract()
|
||||||
|
|
||||||
|
# Their header is full of 'if IE6/7/8' tags. Just get rid of it altogether
|
||||||
|
theirHead = soup.head
|
||||||
|
theirHead.extract()
|
||||||
|
myHead = Tag(soup, 'head')
|
||||||
|
soup.insert(0, myHead)
|
||||||
|
|
||||||
|
return soup
|
||||||
|
|
||||||
|
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user