diff --git a/recipes/clarin.recipe b/recipes/clarin.recipe index 7bbb663d1d..8793387865 100644 --- a/recipes/clarin.recipe +++ b/recipes/clarin.recipe @@ -1,6 +1,6 @@ __license__ = 'GPL v3' -__copyright__ = '2008-2010, Darko Miletic ' +__copyright__ = '2008-2011, Darko Miletic ' ''' clarin.com ''' @@ -18,11 +18,18 @@ class Clarin(BasicNewsRecipe): use_embedded_content = False no_stylesheets = True encoding = 'utf8' + delay = 1 language = 'es_AR' publication_type = 'newspaper' INDEX = 'http://www.clarin.com' masthead_url = 'http://www.clarin.com/static/CLAClarin/images/logo-clarin-print.jpg' - extra_css = ' body{font-family: Arial,Helvetica,sans-serif} h2{font-family: Georgia,serif; font-size: xx-large} .hora{font-weight:bold} .hd p{font-size: small} .nombre-autor{color: #0F325A} ' + extra_css = """ + body{font-family: Arial,Helvetica,sans-serif} + h2{font-family: Georgia,serif; font-size: xx-large} + .hora{font-weight:bold} + .hd p{font-size: small} + .nombre-autor{color: #0F325A} + """ conversion_options = { 'comment' : description @@ -31,7 +38,9 @@ class Clarin(BasicNewsRecipe): , 'language' : language } - keep_only_tags = [dict(attrs={'class':['hd','mt']})] + keep_only_tags = [dict(attrs={'class':['hd','mt']})] + remove_tags = [dict(name=['meta','base','link'])] + remove_attributes = ['lang','_mce_bogus'] feeds = [ (u'Pagina principal', u'http://www.clarin.com/rss/' ) @@ -47,6 +56,10 @@ class Clarin(BasicNewsRecipe): ,(u'Ciudades' , u'http://www.clarin.com/rss/ciudades/' ) ] + + def get_article_url(self, article): + return article.get('guid', None) + def print_version(self, url): return url + '?print=1'