mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-31 10:37:00 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			65 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
			
		
		
	
	
			65 lines
		
	
	
		
			3.1 KiB
		
	
	
	
		
			Plaintext
		
	
	
	
	
	
| __license__   = 'GPL v3'
 | |
| __copyright__ = '2010, Darko Miletic <darko.miletic at gmail.com>'
 | |
| '''
 | |
| elsevier.nl
 | |
| '''
 | |
| 
 | |
| from calibre.web.feeds.news import BasicNewsRecipe
 | |
| 
 | |
| class Pagina12(BasicNewsRecipe):
 | |
|     title                 = 'Elsevier.nl'
 | |
|     __author__            = 'Darko Miletic'
 | |
|     description           = 'News from Holland'
 | |
|     publisher             = 'elsevier.nl'
 | |
|     category              = 'news, politics, Holland'
 | |
|     oldest_article        = 2
 | |
|     max_articles_per_feed = 200
 | |
|     no_stylesheets        = True
 | |
|     encoding              = 'utf-8'
 | |
|     use_embedded_content  = False
 | |
|     language              = 'nl'
 | |
|     country               = 'NL'    
 | |
|     remove_empty_feeds    = True
 | |
|     masthead_url          = 'http://www.elsevier.nl/static/elsevier/stdimg/logo.gif'
 | |
|     extra_css             = ' body{font-family: Arial,Helvetica,sans-serif } img{margin-bottom: 0.4em}  '
 | |
| 
 | |
|     conversion_options = {
 | |
|                           'comment'   : description
 | |
|                         , 'tags'      : category
 | |
|                         , 'publisher' : publisher
 | |
|                         , 'language'  : language
 | |
|                         }
 | |
|     
 | |
|     keep_only_tags     = dict(attrs={'id':'artikel_container'})
 | |
|     remove_tags_before = dict(attrs={'id':'breadcrumb_container'})
 | |
|     remove_tags_after  = dict(attrs={'class':'author_link'})
 | |
|     remove_tags        = [
 | |
|                           dict(attrs={'id':'breadcrumb_container'})
 | |
|                          ,dict(name='div',attrs={'class':'pullout_vak'})
 | |
|                          ]
 | |
|     remove_attributes  = ['width','height']
 | |
| 
 | |
|     feeds = [
 | |
|                (u'Laatste nieuws'      , u'http://www.elsevier.nl/web/RSS/Homepage-RSS.htm?output=xml'         )
 | |
|               ,(u'Nederland'           , u'http://www.elsevier.nl/web/RSS/Nederland-RSS.htm?output=xml'        )
 | |
|               ,(u'Politiek'            , u'http://www.elsevier.nl/web/RSS/Politiek-RSS.htm?output=xml'         )
 | |
|               ,(u'Europese Unie'       , u'http://www.elsevier.nl/web/RSS/Europese-Unie-RSS.htm?output=xml'    )
 | |
|               ,(u'Buitenland'          , u'http://www.elsevier.nl/web/RSS/Buitenland-RSS.htm?output=xml'       )
 | |
|               ,(u'Economie'            , u'http://www.elsevier.nl/web/RSS/Economie-RSS.htm?output=xml'         )
 | |
|               ,(u'Wetenschap'          , u'http://www.elsevier.nl/web/RSS/Wetenschap-RSS.htm?output=xml'       )
 | |
|               ,(u'Cultuur & Televisie' , u'http://www.elsevier.nl/web/RSS/Cultuur-Televisie-RSS.htm?output=xml')
 | |
|               ,(u'Society'             , u'http://www.elsevier.nl/web/RSS/Society-RSS.htm?output=xml'          )
 | |
|               ,(u'Internet&/Gadgets'   , u'http://www.elsevier.nl/web/RSS/Internet-Gadgets-RSS.htm?output=xml' )
 | |
|               ,(u'Comentaren'          , u'http://www.elsevier.nl/web/RSS/Commentaren-RSS.htm?output=xml'      )              
 | |
|             ]
 | |
| 
 | |
|     def print_version(self, url):
 | |
|         return url + '?print=true'
 | |
| 
 | |
|     def get_article_url(self, article):
 | |
|         return article.get('guid',  None).rpartition('?')[0]
 | |
|         
 | |
|     def preprocess_html(self, soup):
 | |
|         for item in soup.findAll(style=True):
 | |
|             del item['style']
 | |
|         return soup |