mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 10:37:00 -04:00 
			
		
		
		
	New recipe for Vrij Nederland by kwetal
This commit is contained in:
		
							parent
							
								
									9c59b44a7e
								
							
						
					
					
						commit
						7ebdad563a
					
				| @ -62,7 +62,7 @@ | |||||||
|     - title: The Economist (no subscription required) |     - title: The Economist (no subscription required) | ||||||
|       author: Kovid Goyal |       author: Kovid Goyal | ||||||
| 
 | 
 | ||||||
|     - title: Sports Illustrated1 |     - title: Sports Illustrated | ||||||
|       author: kwetal |       author: kwetal | ||||||
| 
 | 
 | ||||||
|     - title: Levante |     - title: Levante | ||||||
|  | |||||||
							
								
								
									
										76
									
								
								resources/recipes/vrijnederland.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								resources/recipes/vrijnederland.recipe
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,76 @@ | |||||||
|  | from calibre.web.feeds.news import BasicNewsRecipe | ||||||
|  | from calibre.ebooks.BeautifulSoup import Tag | ||||||
|  | 
 | ||||||
|  | class VrijNederlandRecipe(BasicNewsRecipe) : | ||||||
|  |     __license__   = 'GPL v3' | ||||||
|  |     __author__ = 'kwetal' | ||||||
|  |     language = 'nl_NL' | ||||||
|  |     locale = 'nl_NL' | ||||||
|  |     version = 1 | ||||||
|  | 
 | ||||||
|  |     title = u'Vrij Nederland' | ||||||
|  |     publisher = u'Weekbladpers Tijdschriften' | ||||||
|  |     category = u'News, Opinion' | ||||||
|  |     description = u'Weekly opinion magazine from the Netherlands' | ||||||
|  | 
 | ||||||
|  |     oldest_article = 7 | ||||||
|  |     max_articles_per_feed = 100 | ||||||
|  |     use_embedded_content = False | ||||||
|  | 
 | ||||||
|  |     no_stylesheets = True | ||||||
|  |     remove_javascript = True | ||||||
|  |     # Does not seem to work | ||||||
|  |     #extra_css = '''li.calibre2 {padding-bottom: 40px}''' | ||||||
|  | 
 | ||||||
|  |     conversion_options = {'publisher': publisher, 'tags': category, 'comments': description} | ||||||
|  | 
 | ||||||
|  |     feeds = [] | ||||||
|  |     feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss')) | ||||||
|  |     feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss')) | ||||||
|  |     feeds.append((u'Economie', u'http://www.vn.nl/economie.rss')) | ||||||
|  |     feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss')) | ||||||
|  |     feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss')) | ||||||
|  |     feeds.append((u'Crime', u'http://www.vn.nl/crime.rss')) | ||||||
|  |     feeds.append((u'Media', u'http://www.vn.nl/media.rss')) | ||||||
|  |     feeds.append((u'De Republiek der Letteren', u'http://www.vn.nl/republiek.rss')) | ||||||
|  |     feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss')) | ||||||
|  |     feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss')) | ||||||
|  |     feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss')) | ||||||
|  |     feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss')) | ||||||
|  |     feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss')) | ||||||
|  |     feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss')) | ||||||
|  |     feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss')) | ||||||
|  |     feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss')) | ||||||
|  | 
 | ||||||
|  |     keep_only_tags = [dict(name = 'div', attrs = {'class' : 'cl-column column-one'})] | ||||||
|  | 
 | ||||||
|  |     remove_tags = [] | ||||||
|  |     remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element guest-book-overview'})) | ||||||
|  |     remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element forum-message-form'})) | ||||||
|  |     remove_tags.append(dict(name = 'div', attrs = {'class' : 'mediaterms'})) | ||||||
|  |     remove_tags.append(dict(name = 'div', attrs = {'class': 'label-term'})) | ||||||
|  |     remove_tags.append(dict(name =  'div', attrs =  {'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'})) | ||||||
|  |     remove_tags.append(dict(name = 'object')) | ||||||
|  |     remove_tags.append(dict(name = 'link')) | ||||||
|  |     remove_tags.append(dict(name = 'meta')) | ||||||
|  | 
 | ||||||
|  |     def preprocess_html(self, soup): | ||||||
|  |         # Just clean up the result a little | ||||||
|  |         meta = soup.find('div', attrs = {'class': 'meta'}) | ||||||
|  |         if meta: | ||||||
|  |             link = meta.find('span', attrs = {'class': 'link'}) | ||||||
|  |             if link: | ||||||
|  |                 link.extract() | ||||||
|  |             for seperator in meta.findAll('span', attrs = {'class': 'seperator'}): | ||||||
|  |                 seperator.extract() | ||||||
|  | 
 | ||||||
|  |         # Their header is full of 'if IE6/7/8' tags. Just get rid of it altogether | ||||||
|  |         theirHead = soup.head | ||||||
|  |         theirHead.extract() | ||||||
|  |         myHead = Tag(soup, 'head') | ||||||
|  |         soup.insert(0, myHead) | ||||||
|  | 
 | ||||||
|  |         return soup | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
|  | 
 | ||||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user