Update Perfil

Fixes #1499860 [Updated recipe for Perfil](https://bugs.launchpad.net/calibre/+bug/1499860)
This commit is contained in:
Kovid Goyal 2015-09-26 08:54:00 +05:30
parent 6ede52c40d
commit 323be26c0f

View File

@ -1,5 +1,5 @@
__license__ = 'GPL v3' __license__ = 'GPL v3'
__copyright__ = '2010-2011, Darko Miletic <darko.miletic at gmail.com>' __copyright__ = '2010-2015, Darko Miletic <darko.miletic at gmail.com>'
''' '''
perfil.com perfil.com
''' '''
@ -15,11 +15,11 @@ class Perfil(BasicNewsRecipe):
oldest_article = 2 oldest_article = 2
max_articles_per_feed = 200 max_articles_per_feed = 200
no_stylesheets = True no_stylesheets = True
encoding = 'cp1252' encoding = 'utf8'
use_embedded_content = False use_embedded_content = False
language = 'es_AR' language = 'es_AR'
remove_empty_feeds = True remove_empty_feeds = True
masthead_url = 'http://www.perfil.com/export/sites/diarioperfil/arte/10/logo_perfilcom_mm.gif' masthead_url = 'http://www.perfil.com/__export/1330013400000/system/modules/com.tfsla.perfil.diario/resources/v1/images/perfilcom-logo-secciones.png'
extra_css = """ extra_css = """
body{font-family: Arial,Helvetica,sans-serif } body{font-family: Arial,Helvetica,sans-serif }
.seccion{border-bottom: 1px dotted #666666; text-transform: uppercase; font-size: x-large} .seccion{border-bottom: 1px dotted #666666; text-transform: uppercase; font-size: x-large}
@ -37,13 +37,11 @@ class Perfil(BasicNewsRecipe):
} }
remove_tags = [ remove_tags = [
dict(name=['iframe','embed','object','base','meta','link']) dict(name=['iframe','embed','object','base','meta','link'])
,dict(name='a', attrs={'href':'#comentarios'}) ,dict(attrs={'id':['social-articulo','relacionadas']})
,dict(name='div', attrs={'class':'foto3'}) ,dict(attrs={'class':'destacadoNota'})
,dict(name='img', attrs={'alt':['ampliar','Ampliar']})
] ]
keep_only_tags=[dict(attrs={'class':['articulo','cuerpoSuperior']})] keep_only_tags=[dict(attrs={'id':'header-noticia'}), dict(attrs={'class':'cuerpo'})]
remove_attributes=['onload','lang','width','height','border']
feeds = [ feeds = [
(u'Ultimo momento' , u'http://www.perfil.com/rss/ultimomomento.xml') (u'Ultimo momento' , u'http://www.perfil.com/rss/ultimomomento.xml')
@ -60,27 +58,3 @@ class Perfil(BasicNewsRecipe):
,(u'Salud' , u'http://www.perfil.com/rss/salud.xml' ) ,(u'Salud' , u'http://www.perfil.com/rss/salud.xml' )
,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' ) ,(u'Tecnologia' , u'http://www.perfil.com/rss/tecnologia.xml' )
] ]
def get_article_url(self, article):
return article.get('guid', None)
def preprocess_html(self, soup):
for item in soup.findAll(style=True):
del item['style']
for item in soup.findAll('a'):
limg = item.find('img')
if item.string is not None:
str = item.string
item.replaceWith(str)
else:
if limg:
item.name = 'div'
item.attrs = []
else:
str = self.tag_to_string(item)
item.replaceWith(str)
for item in soup.findAll('img'):
if not item.has_key('alt'):
item['alt'] = 'image'
return soup