From c161c493ec588c61d87055001e14bc99ae987aa1 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Fri, 3 Sep 2010 09:41:01 -0600 Subject: [PATCH] HOY by Fco Javier Nieto --- resources/recipes/hoy.recipe | 69 ++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 resources/recipes/hoy.recipe diff --git a/resources/recipes/hoy.recipe b/resources/recipes/hoy.recipe new file mode 100644 index 0000000000..167d57d3a1 --- /dev/null +++ b/resources/recipes/hoy.recipe @@ -0,0 +1,69 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +__license__ = 'GPL v3' +__copyright__ = '2010, Francisco Javier Nieto ' +''' +www.hoy.es +''' + +from calibre.web.feeds.news import BasicNewsRecipe +from calibre.ebooks.BeautifulSoup import Tag + +class Hoy(BasicNewsRecipe): + title = 'HOY' + __author__ = 'Fco Javier Nieto' + description = u'Noticias desde Extremadura' + publisher = 'HOY' + category = 'news, politics, Spain, Extremadura' + oldest_article = 2 + max_articles_per_feed = 100 + no_stylesheets = True + use_embedded_content = False + delay = 1 + encoding = 'cp1252' + language = 'es' + + feeds = [ + (u'Portada' , u'http://www.hoy.es/portada.xml' ), + (u'Regional' , u'http://www.hoy.es/rss/feeds/regional.xml' ), + (u'Prov de Badajoz' , u'http://www.hoy.es/rss/feeds/prov_badajoz.xml' ), + (u'Prov de Caceres' , u'http://www.hoy.es/rss/feeds/prov_caceres.xml' ), + (u'Badajoz' , u'http://www.hoy.es/rss/feeds/badajoz.xml' ), + (u'Caceres' , u'http://www.hoy.es/rss/feeds/caceres.xml' ), + (u'Merida' , u'http://www.hoy.es/rss/feeds/merida.xml' ), + (u'Opinion' , u'http://www.hoy.es/rss/feeds/opinion.xml' ), + (u'Nacional' , u'http://www.hoy.es/rss/feeds/nacional.xml' ), + (u'Internacional' , u'http://www.hoy.es/rss/feeds/internacional.xml' ), + (u'Economia' , u'http://www.hoy.es/rss/feeds/economia.xml' ), + (u'Deportes' , u'http://www.hoy.es/rss/feeds/deportes.xml' ), + (u'Sociedad' , u'http://www.hoy.es/rss/feeds/sociedad.xml' ), + (u'Cultura' , u'http://www.hoy.es/rss/feeds/cultura.xml' ), + (u'Television' , u'http://www.hoy.es/rss/feeds/television.xml' ), + (u'contraportada' , u'http://www.hoy.es/rss/feeds/contraportada.xml' ) + ] + + + keep_only_tags = [ + dict(name='h1', attrs={'class':['headline']}), + dict(name='h2', attrs={'class':['subhead']}), + dict(name='div', attrs={'class':['text']}) + ] + + remove_tags = [ + dict(name=['object','link','script']) + ,dict(name='div', attrs={'class':['colC_articulo','peu']}) + ] + + remove_tags_after = [dict(name='div', attrs={'class':'text'})] + + extra_css = '.headline {font: sans-serif 2em;}\n.subhead,h2{font: sans-serif 1.5em\n' + + def preprocess_html(self, soup): + soup.html['dir' ] = self.direction + mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) + soup.head.insert(0,mcharset) + for item in soup.findAll(style=True): + del item['style'] + return soup +