mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-30 18:22:25 -04:00 
			
		
		
		
	New recipe for Vrij Nederland by kwetal
This commit is contained in:
		
							parent
							
								
									9c59b44a7e
								
							
						
					
					
						commit
						7ebdad563a
					
				| @ -62,7 +62,7 @@ | ||||
|     - title: The Economist (no subscription required) | ||||
|       author: Kovid Goyal | ||||
| 
 | ||||
|     - title: Sports Illustrated1 | ||||
|     - title: Sports Illustrated | ||||
|       author: kwetal | ||||
| 
 | ||||
|     - title: Levante | ||||
|  | ||||
							
								
								
									
										76
									
								
								resources/recipes/vrijnederland.recipe
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										76
									
								
								resources/recipes/vrijnederland.recipe
									
									
									
									
									
										Normal file
									
								
							| @ -0,0 +1,76 @@ | ||||
| from calibre.web.feeds.news import BasicNewsRecipe | ||||
| from calibre.ebooks.BeautifulSoup import Tag | ||||
| 
 | ||||
| class VrijNederlandRecipe(BasicNewsRecipe) : | ||||
|     __license__   = 'GPL v3' | ||||
|     __author__ = 'kwetal' | ||||
|     language = 'nl_NL' | ||||
|     locale = 'nl_NL' | ||||
|     version = 1 | ||||
| 
 | ||||
|     title = u'Vrij Nederland' | ||||
|     publisher = u'Weekbladpers Tijdschriften' | ||||
|     category = u'News, Opinion' | ||||
|     description = u'Weekly opinion magazine from the Netherlands' | ||||
| 
 | ||||
|     oldest_article = 7 | ||||
|     max_articles_per_feed = 100 | ||||
|     use_embedded_content = False | ||||
| 
 | ||||
|     no_stylesheets = True | ||||
|     remove_javascript = True | ||||
|     # Does not seem to work | ||||
|     #extra_css = '''li.calibre2 {padding-bottom: 40px}''' | ||||
| 
 | ||||
|     conversion_options = {'publisher': publisher, 'tags': category, 'comments': description} | ||||
| 
 | ||||
|     feeds = [] | ||||
|     feeds.append((u'Politiek', u'http://www.vn.nl/politiek.rss')) | ||||
|     feeds.append((u'Buitenland', u'http://www.vn.nl/buitenland.rss')) | ||||
|     feeds.append((u'Economie', u'http://www.vn.nl/economie.rss')) | ||||
|     feeds.append((u'Justitie', u'http://www.vn.nl/justitie.rss')) | ||||
|     feeds.append((u'Samenleving', u'http://www.vn.nl/samenleving.rss')) | ||||
|     feeds.append((u'Crime', u'http://www.vn.nl/crime.rss')) | ||||
|     feeds.append((u'Media', u'http://www.vn.nl/media.rss')) | ||||
|     feeds.append((u'De Republiek der Letteren', u'http://www.vn.nl/republiek.rss')) | ||||
|     feeds.append((u'Max van Weezel', u'http://www.vn.nl/vanweezel.rss')) | ||||
|     feeds.append((u'Ko Colijn', u'http://www.vn.nl/colijn.rss')) | ||||
|     feeds.append((u'Kees Kraaijeveld', u'http://www.vn.nl/kraaijeveld.rss')) | ||||
|     feeds.append((u'Frank Kalshoven', u'http://www.vn.nl/kalshoven.rss')) | ||||
|     feeds.append((u'Stephan Sanders', u'http://www.vn.nl/sanders.rss')) | ||||
|     feeds.append((u'Micha Wertheim', u'http://www.vn.nl/wertheim.rss')) | ||||
|     feeds.append((u'Arnon Grunberg', u'http://www.vn.nl/grunberg.rss')) | ||||
|     feeds.append((u'Carel Peeters', u'http://www.vn.nl/carelpeeters.rss')) | ||||
| 
 | ||||
|     keep_only_tags = [dict(name = 'div', attrs = {'class' : 'cl-column column-one'})] | ||||
| 
 | ||||
|     remove_tags = [] | ||||
|     remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element guest-book-overview'})) | ||||
|     remove_tags.append(dict(name = 'div', attrs = {'class' : 'wpg-element forum-message-form'})) | ||||
|     remove_tags.append(dict(name = 'div', attrs = {'class' : 'mediaterms'})) | ||||
|     remove_tags.append(dict(name = 'div', attrs = {'class': 'label-term'})) | ||||
|     remove_tags.append(dict(name =  'div', attrs =  {'class': 'wpg-element Media-Collection-Element-Artikel-Lijst'})) | ||||
|     remove_tags.append(dict(name = 'object')) | ||||
|     remove_tags.append(dict(name = 'link')) | ||||
|     remove_tags.append(dict(name = 'meta')) | ||||
| 
 | ||||
|     def preprocess_html(self, soup): | ||||
|         # Just clean up the result a little | ||||
|         meta = soup.find('div', attrs = {'class': 'meta'}) | ||||
|         if meta: | ||||
|             link = meta.find('span', attrs = {'class': 'link'}) | ||||
|             if link: | ||||
|                 link.extract() | ||||
|             for seperator in meta.findAll('span', attrs = {'class': 'seperator'}): | ||||
|                 seperator.extract() | ||||
| 
 | ||||
|         # Their header is full of 'if IE6/7/8' tags. Just get rid of it altogether | ||||
|         theirHead = soup.head | ||||
|         theirHead.extract() | ||||
|         myHead = Tag(soup, 'head') | ||||
|         soup.insert(0, myHead) | ||||
| 
 | ||||
|         return soup | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user