mirror of
				https://github.com/kovidgoyal/calibre.git
				synced 2025-10-24 23:38:55 -04:00 
			
		
		
		
	
		
			
				
	
	
		
			70 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
			
		
		
	
	
			70 lines
		
	
	
		
			3.3 KiB
		
	
	
	
		
			Python
		
	
	
	
	
	
| #!/usr/bin/env  python
 | |
| # -*- coding: utf-8 -*-
 | |
| 
 | |
| __license__   = 'GPL v3'
 | |
| __copyright__ = '2010, Francisco Javier Nieto <frjanibo at gmail.com>'
 | |
| '''
 | |
| www.hoy.es
 | |
| '''
 | |
| 
 | |
| from calibre.web.feeds.news import BasicNewsRecipe
 | |
| from calibre.ebooks.BeautifulSoup import Tag
 | |
| 
 | |
| class Hoy(BasicNewsRecipe):
 | |
|     title                 = 'HOY'
 | |
|     __author__            = 'Fco Javier Nieto'
 | |
|     description           = u'Noticias desde Extremadura'
 | |
|     publisher             = 'HOY'
 | |
|     category              = 'news, politics, Spain, Extremadura'
 | |
|     oldest_article        = 2
 | |
|     max_articles_per_feed = 100
 | |
|     no_stylesheets        = True
 | |
|     use_embedded_content  = False
 | |
|     delay                 = 1
 | |
|     encoding              = 'cp1252'
 | |
|     language = 'es'
 | |
| 
 | |
|     feeds              = [
 | |
|                             (u'Portada'         , u'http://www.hoy.es/portada.xml'   ),
 | |
|                             (u'Regional'        , u'http://www.hoy.es/rss/feeds/regional.xml'   ),
 | |
|                             (u'Prov de Badajoz' , u'http://www.hoy.es/rss/feeds/prov_badajoz.xml'   ),
 | |
|                             (u'Prov de Caceres' , u'http://www.hoy.es/rss/feeds/prov_caceres.xml'   ),
 | |
|                             (u'Badajoz'         , u'http://www.hoy.es/rss/feeds/badajoz.xml'      ),
 | |
|                             (u'Caceres'         , u'http://www.hoy.es/rss/feeds/caceres.xml'      ),
 | |
|                             (u'Merida'          , u'http://www.hoy.es/rss/feeds/merida.xml'      ),
 | |
|                             (u'Opinion'         , u'http://www.hoy.es/rss/feeds/opinion.xml'      ),
 | |
|                             (u'Nacional'        , u'http://www.hoy.es/rss/feeds/nacional.xml'      ),
 | |
|                             (u'Internacional'   , u'http://www.hoy.es/rss/feeds/internacional.xml'      ),
 | |
|                             (u'Economia'        , u'http://www.hoy.es/rss/feeds/economia.xml'      ),
 | |
|                             (u'Deportes'        , u'http://www.hoy.es/rss/feeds/deportes.xml'      ),
 | |
|                             (u'Sociedad'        , u'http://www.hoy.es/rss/feeds/sociedad.xml'      ),
 | |
|                             (u'Cultura'         , u'http://www.hoy.es/rss/feeds/cultura.xml'      ),
 | |
|                             (u'Television'      , u'http://www.hoy.es/rss/feeds/television.xml'      ),
 | |
|                             (u'contraportada'   , u'http://www.hoy.es/rss/feeds/contraportada.xml'      )
 | |
|                          ]
 | |
| 
 | |
| 
 | |
|     keep_only_tags = [
 | |
|                        dict(name='h1', attrs={'class':['headline']}),
 | |
|                        dict(name='h2', attrs={'class':['subhead']}),
 | |
|                        dict(name='div', attrs={'class':['text']})
 | |
|                      ]
 | |
| 
 | |
|     remove_tags        = [
 | |
|                              dict(name=['object','link','script'])
 | |
|                             ,dict(name='div', attrs={'class':['colC_articulo','peu']})
 | |
|                          ]
 | |
| 
 | |
|     remove_tags_after = [dict(name='div', attrs={'class':'text'})]
 | |
| 
 | |
|     extra_css = '.headline {font: sans-serif 2em;}\n.subhead,h2{font: sans-serif 1.5em\n'
 | |
| 
 | |
|     def preprocess_html(self, soup):
 | |
|         soup.html['dir' ] = self.direction
 | |
|         mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")])
 | |
|         soup.head.insert(0,mcharset)
 | |
|         for item in soup.findAll(style=True):
 | |
|             del item['style']
 | |
|         return soup
 | |
| 
 |