mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-11-03 19:17:02 -05:00 
			
		
		
		
	New recipe for Vrij Nederland by kwetal
This commit is contained in:
		
							parent
							
								
									9c59b44a7e
								
							
						
					
					
						commit
						7ebdad563a
					
				@ -62,7 +62,7 @@
 | 
				
			|||||||
    - title: The Economist (no subscription required)
 | 
					    - title: The Economist (no subscription required)
 | 
				
			||||||
      author: Kovid Goyal
 | 
					      author: Kovid Goyal
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - title: Sports Illustrated1
 | 
					    - title: Sports Illustrated
 | 
				
			||||||
      author: kwetal
 | 
					      author: kwetal
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    - title: Levante
 | 
					    - title: Levante
 | 
				
			||||||
 | 
				
			|||||||
							
								
								
									
										76
									
								
								resources/recipes/vrijnederland.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								resources/recipes/vrijnederland.recipe
									
									
									
									
									
										Normal file
									
								
							@ -0,0 +1,76 @@
 | 
				
			|||||||
 | 
					from calibre.web.feeds.news import BasicNewsRecipe
 | 
				
			||||||
 | 
					from calibre.ebooks.BeautifulSoup import Tag
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					class VrijNederlandRecipe(BasicNewsRecipe) :
 | 
				
			||||||
 | 
					    __license__   = 'GPL v3'
 | 
				
			||||||
 | 
					    __author__ = 'kwetal'
 | 
				
			||||||
 | 
					    language = 'nl_NL'
 | 
				
			||||||
 | 
					    locale = 'nl_NL'
 | 
				
			||||||
 | 
					    version = 1
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    title = u'Vrij Nederland'
 | 
				
			||||||
 | 
					    publisher = u'Weekbladpers Tijdschriften'
 | 
				
			||||||
 | 
					    category = u'News, Opinion'
 | 
				
			||||||
 | 
					    description = u'Weekly opinion magazine from the Netherlands'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    oldest_article = 7
 | 
				
			||||||
 | 
					    max_articles_per_feed = 100
 | 
				
			||||||
 | 
					    use_embedded_content = False
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    no_stylesheets = True
 | 
				
			||||||
 | 
					    remove_javascript = True
 | 
				
			||||||
 | 
					    # Does not seem to work
 | 
				
			||||||
 | 
					    #extra_css = '''li.calibre2 {padding-bottom: 40px}'''
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    conversion_options = {'publisher': publisher, 'tags': category, 'comments': description}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    feeds = []
 | 
				
			||||||
 | 
					    feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Economie', u'http://www.vn.nl/economie.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Crime', u'http://www.vn.nl/crime.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Media', u'http://www.vn.nl/media.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'De Republiek der Letteren', u'http://www.vn.nl/republiek.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss'))
 | 
				
			||||||
 | 
					    feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    keep_only_tags = [dict(name = 'div', attrs = {'class' : 'cl-column column-one'})]
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    remove_tags = []
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element guest-book-overview'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element forum-message-form'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'div', attrs = {'class' : 'mediaterms'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'div', attrs = {'class': 'label-term'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name =  'div', attrs =  {'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'}))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'object'))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'link'))
 | 
				
			||||||
 | 
					    remove_tags.append(dict(name = 'meta'))
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					    def preprocess_html(self, soup):
 | 
				
			||||||
 | 
					        # Just clean up the result a little
 | 
				
			||||||
 | 
					        meta = soup.find('div', attrs = {'class': 'meta'})
 | 
				
			||||||
 | 
					        if meta:
 | 
				
			||||||
 | 
					            link = meta.find('span', attrs = {'class': 'link'})
 | 
				
			||||||
 | 
					            if link:
 | 
				
			||||||
 | 
					                link.extract()
 | 
				
			||||||
 | 
					            for seperator in meta.findAll('span', attrs = {'class': 'seperator'}):
 | 
				
			||||||
 | 
					                seperator.extract()
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        # Their header is full of 'if IE6/7/8' tags. Just get rid of it altogether
 | 
				
			||||||
 | 
					        theirHead = soup.head
 | 
				
			||||||
 | 
					        theirHead.extract()
 | 
				
			||||||
 | 
					        myHead = Tag(soup, 'head')
 | 
				
			||||||
 | 
					        soup.insert(0, myHead)
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					        return soup
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user