diff --git a/recipes/icons/marca.png b/recipes/icons/marca.png new file mode 100644 index 0000000000..e9231176b6 Binary files /dev/null and b/recipes/icons/marca.png differ diff --git a/recipes/marca.recipe b/recipes/marca.recipe index 14543b3c0f..1abd8791b6 100644 --- a/recipes/marca.recipe +++ b/recipes/marca.recipe @@ -1,14 +1,11 @@ -#!/usr/bin/env python # -*- coding: utf-8 -*- - __license__ = 'GPL v3' -__copyright__ = '2009, Darko Miletic ' +__copyright__ = '2009-2011, Darko Miletic ' ''' www.marca.com ''' from calibre.web.feeds.news import BasicNewsRecipe -from calibre.ebooks.BeautifulSoup import Tag class Marca(BasicNewsRecipe): title = 'Marca' @@ -22,35 +19,30 @@ class Marca(BasicNewsRecipe): use_embedded_content = False delay = 1 encoding = 'iso-8859-15' - language = 'es' + language = 'es_ES' + publication_type = 'newsportal' + masthead_url = 'http://estaticos.marca.com/deporte/img/v3.0/img_marca-com.png' + extra_css = """ + body{font-family: Tahoma,Geneva,sans-serif} + h1,h2,h3,h4,h5,h6{font-family: 'LatoBlack',Tahoma,Geneva,sans-serif} + .cab_articulo h4 {font-family: Georgia,"Times New Roman",Times,serif} + .antetitulo{text-transform: uppercase} + """ - direction = 'ltr' + feeds = [(u'Portada', u'http://estaticos.marca.com/rss/portada.xml')] - html2lrf_options = [ - '--comment' , description - , '--category' , category - , '--publisher', publisher - ] - - html2epub_options = 'publisher="' + publisher + '"\ncomments="' + description + '"\ntags="' + category + '"' - - feeds = [(u'Portada', u'http://rss.marca.com/rss/descarga.htm?data2=425')] - - keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','col_izq']})] - - remove_tags = [ - dict(name=['object','link','script']) - ,dict(name='div', attrs={'class':['colC','peu']}) - ,dict(name='div', attrs={'class':['utilidades estirar','bloque_int_corr estirar']}) + keep_only_tags = [dict(name='div', attrs={'class':['cab_articulo','cuerpo_articulo']})] + remove_attributes = ['lang'] + remove_tags = [ + dict(name=['object','link','script','embed','iframe','meta','base']) + ,dict(name='div', attrs={'class':'tabs'}) ] - remove_tags_after = [dict(name='div', attrs={'class':'bloque_int_corr estirar'})] - def preprocess_html(self, soup): - soup.html['dir' ] = self.direction - mcharset = Tag(soup,'meta',[("http-equiv","Content-Type"),("content","text/html; charset=utf-8")]) - soup.head.insert(0,mcharset) for item in soup.findAll(style=True): del item['style'] return soup + def get_article_url(self, article): + return article.get('guid', None) +