mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Fix #1056178 (Updated recipe for El Pais)
This commit is contained in:
parent
05b0a097ac
commit
0ec5e231dc
@ -41,7 +41,7 @@ class ElPais_RSS(BasicNewsRecipe):
|
|||||||
,dict(attrs={'class':['firma','columna_texto','entrevista_p_r']})
|
,dict(attrs={'class':['firma','columna_texto','entrevista_p_r']})
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(name=['meta','link','base','iframe','embed','object'])
|
dict(name=['iframe','embed','object'])
|
||||||
,dict(attrs={'class':'disposicion_vertical'})
|
,dict(attrs={'class':'disposicion_vertical'})
|
||||||
]
|
]
|
||||||
|
|
||||||
@ -74,13 +74,14 @@ class ElPais_RSS(BasicNewsRecipe):
|
|||||||
,(u'Justicia y Leyes' , u'http://elpais.com/tag/rss/justicia/a/' )
|
,(u'Justicia y Leyes' , u'http://elpais.com/tag/rss/justicia/a/' )
|
||||||
,(u'Guerras y conflictos' , u'http://elpais.com/tag/rss/conflictos/a/' )
|
,(u'Guerras y conflictos' , u'http://elpais.com/tag/rss/conflictos/a/' )
|
||||||
,(u'Politica' , u'http://ep00.epimg.net/rss/politica/portada.xml' )
|
,(u'Politica' , u'http://ep00.epimg.net/rss/politica/portada.xml' )
|
||||||
,(u'Opinion' , u'http://ep01.epimg.net/rss/politica/opinion.xml' )
|
,(u'Opinion' , u'http://ep01.epimg.net/rss/elpais/opinion.xml' )
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_article_url(self, article):
|
def get_article_url(self, article):
|
||||||
url = BasicNewsRecipe.get_article_url(self, article)
|
url = BasicNewsRecipe.get_article_url(self, article)
|
||||||
if url and (not('/album/' in url) and not('/futbol/partido/' in url)):
|
if url and (not('/album/' in url) and not('/futbol/partido/' in url)):
|
||||||
return url
|
urlverified = self.browser.open_novisit(url).geturl()
|
||||||
|
return urlverified
|
||||||
self.log('Skipping non-article', url)
|
self.log('Skipping non-article', url)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@ -107,3 +108,7 @@ class ElPais_RSS(BasicNewsRecipe):
|
|||||||
for item in soup.findAll('img',alt=False):
|
for item in soup.findAll('img',alt=False):
|
||||||
item['alt'] = 'image'
|
item['alt'] = 'image'
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
|
def preprocess_raw_html(self, raw, url):
|
||||||
|
return '<html><head><title>Untitled</title>'+raw[raw.find('</head>'):]
|
||||||
|
|
Loading…
x
Reference in New Issue
Block a user