From 0ec5e231dc63be4f559470cf9a250b2915be7b27 Mon Sep 17 00:00:00 2001 From: Kovid Goyal Date: Tue, 25 Sep 2012 19:31:36 +0530 Subject: [PATCH] Fix #1056178 (Updated recipe for El Pais) --- recipes/elpais_impreso.recipe | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/recipes/elpais_impreso.recipe b/recipes/elpais_impreso.recipe index ffa1033477..2dbd79d094 100644 --- a/recipes/elpais_impreso.recipe +++ b/recipes/elpais_impreso.recipe @@ -41,7 +41,7 @@ class ElPais_RSS(BasicNewsRecipe): ,dict(attrs={'class':['firma','columna_texto','entrevista_p_r']}) ] remove_tags = [ - dict(name=['meta','link','base','iframe','embed','object']) + dict(name=['iframe','embed','object']) ,dict(attrs={'class':'disposicion_vertical'}) ] @@ -74,13 +74,14 @@ class ElPais_RSS(BasicNewsRecipe): ,(u'Justicia y Leyes' , u'http://elpais.com/tag/rss/justicia/a/' ) ,(u'Guerras y conflictos' , u'http://elpais.com/tag/rss/conflictos/a/' ) ,(u'Politica' , u'http://ep00.epimg.net/rss/politica/portada.xml' ) - ,(u'Opinion' , u'http://ep01.epimg.net/rss/politica/opinion.xml' ) + ,(u'Opinion' , u'http://ep01.epimg.net/rss/elpais/opinion.xml' ) ] def get_article_url(self, article): url = BasicNewsRecipe.get_article_url(self, article) if url and (not('/album/' in url) and not('/futbol/partido/' in url)): - return url + urlverified = self.browser.open_novisit(url).geturl() + return urlverified self.log('Skipping non-article', url) return None @@ -107,3 +108,7 @@ class ElPais_RSS(BasicNewsRecipe): for item in soup.findAll('img',alt=False): item['alt'] = 'image' return soup + + def preprocess_raw_html(self, raw, url): + return 'Untitled'+raw[raw.find(''):] + \ No newline at end of file