diff --git a/recipes/icons/la_republica.png b/recipes/icons/la_republica.png new file mode 100644 index 0000000000..e24405b6d4 Binary files /dev/null and b/recipes/icons/la_republica.png differ diff --git a/recipes/la_republica.recipe b/recipes/la_republica.recipe index 14dc0d0aef..e55211c223 100644 --- a/recipes/la_republica.recipe +++ b/recipes/la_republica.recipe @@ -1,51 +1,55 @@ -#!/usr/bin/env python __license__ = 'GPL v3' __author__ = 'Lorenzo Vigentini, based on Darko Miletic, Gabriele Marini' -__copyright__ = '2009, Darko Miletic , Lorenzo Vigentini ' +__copyright__ = '2009-2011, Darko Miletic , Lorenzo Vigentini ' description = 'Italian daily newspaper - v1.01 (04, January 2010); 16.05.2010 new version' ''' http://www.repubblica.it/ ''' +import re from calibre.web.feeds.news import BasicNewsRecipe class LaRepubblica(BasicNewsRecipe): - __author__ = 'Lorenzo Vigentini, Gabriele Marini' - description = 'Italian daily newspaper' - - cover_url = 'http://www.repubblica.it/images/homepage/la_repubblica_logo.gif' - title = u'La Repubblica' - publisher = 'Gruppo editoriale L\'Espresso' - category = 'News, politics, culture, economy, general interest' - - language = 'it' - timefmt = '[%a, %d %b, %Y]' - - oldest_article = 5 - max_articles_per_feed = 100 - use_embedded_content = False - recursion = 10 - - remove_javascript = True - no_stylesheets = True + title = 'La Repubblica' + __author__ = 'Lorenzo Vigentini, Gabriele Marini, Darko Miletic' + description = 'il quotidiano online con tutte le notizie in tempo reale. News e ultime notizie. Tutti i settori: politica, cronaca, economia, sport, esteri, scienza, tecnologia, internet, spettacoli, musica, cultura, arte, mostre, libri, dvd, vhs, concerti, cinema, attori, attrici, recensioni, chat, cucina, mappe. Le citta di Repubblica: Roma, Milano, Bologna, Firenze, Palermo, Napoli, Bari, Torino.' + masthead_url = 'http://www.repubblica.it/static/images/homepage/2010/la-repubblica-logo-home-payoff.png' + publisher = 'Gruppo editoriale L\'Espresso' + category = 'News, politics, culture, economy, general interest' + language = 'it' + timefmt = '[%a, %d %b, %Y]' + oldest_article = 5 + encoding = 'utf8' + use_embedded_content = False + #recursion = 10 + no_stylesheets = True + extra_css = """ + img{display: block} + """ + preprocess_regexps = [ + (re.compile(r'.*?', re.DOTALL|re.IGNORECASE), lambda match: ''), + (re.compile(r'.*?', re.DOTALL|re.IGNORECASE), lambda match: '<head><title>'), + (re.compile(r'.*?', re.DOTALL|re.IGNORECASE), lambda match: '') + ] + def get_article_url(self, article): link = article.get('id', article.get('guid', None)) if link is None: return article return link - - keep_only_tags = [dict(name='div', attrs={'class':'articolo'}), - dict(name='div', attrs={'class':'body-text'}), -# dict(name='div', attrs={'class':'page-content'}), + + keep_only_tags = [ + dict(attrs={'class':'articolo'}), + dict(attrs={'class':'body-text'}), dict(name='p', attrs={'class':'disclaimer clearfix'}), - dict(name='div', attrs={'id':'contA'}) + dict(attrs={'id':'contA'}) ] remove_tags = [ - dict(name=['object','link']), + dict(name=['object','link','meta']), dict(name='span',attrs={'class':'linkindice'}), dict(name='div', attrs={'class':'bottom-mobile'}), dict(name='div', attrs={'id':['rssdiv','blocco']}),