#!/usr/bin/env python __license__ = 'GPL v3' __author__ = 'Lorenzo Vigentini & Edwin van Maastrigt' __copyright__ = '2009, Lorenzo Vigentini and Edwin van Maastrigt ' __description__ = 'Financial news daily paper - v1.02 (30, January 2010)' ''' http://www.ilsole24ore.com/ ''' from calibre.web.feeds.news import BasicNewsRecipe class ilsole24Ore(BasicNewsRecipe): author = 'Lorenzo Vigentini & Edwin van Maastrigt' description = 'Financial news daily paper' cover_url = 'http://www.ilsole24ore.com/img2007/print_header.gif' title = u'il Sole 24 Ore New' publisher = 'italiaNews' category = 'News, finance, economy, politics' language = 'it' timefmt = '[%a, %d %b, %Y]' oldest_article = 2 max_articles_per_feed = 50 use_embedded_content = False remove_javascript = True no_stylesheets = True def get_article_url(self, article): return article.get('id', article.get('guid', None)) def print_version(self, url): link, sep, params = url.rpartition('?') if link is None: return link.replace('_1.php', '_php') return link.replace('.shtml', '_PRN.shtml') keep_only_tags = [ dict(name='div', attrs={'class':'txt'}) ] # remove_tags = [dict(name='br')] feeds = [ (u'Prima pagina', u'http://www.ilsole24ore.com/rss/primapagina.xml'), (u'Norme e tributi', u'http://www.ilsole24ore.com/rss/norme-tributi.xml'), (u'Finanza e mercati', u'http://www.ilsole24ore.com/rss/finanza-mercati.xml'), (u'Economia e lavoro', u'http://www.ilsole24ore.com/rss/economia-lavoro.xml'), (u'Italia', u'http://www.ilsole24ore.com/rss/italia.xml'), (u'Mondo', u'http://www.ilsole24ore.com/rss/mondo.xml'), (u'Tecnologia e business', u'http://www.ilsole24ore.com/rss/tecnologia-business.xml'), (u'Cultura e tempo libero', u'http://www.ilsole24ore.com/rss/tempolibero-cultura.xml'), (u'Sport', u'http://www.ilsole24ore.com/rss/sport.xml'), (u'Professionisti 24', u'http://www.ilsole24ore.com/rss/prof_home.xml'), (u'Ambiente e Sicurezza',u'http://www.ilsole24ore.com/rss/prof_as.xml') ] extra_css = ''' html, body, table, tr, td, h1, h2, h3, h4, h5, h6, p, a, span, br, img {margin:0;padding:0;border:0;font-size:12px;font-family:"Georgia","Times New Roman";} .linkHighlight {color:#0292c6;} .txt {border-bottom:1px solid #7c7c7c;padding-bottom:20px};text-align:justify;font-family:"serif"} .txt p {line-height:18px;} .txt span {line-height:22px;} .title h3 {color:#7b7b7b;} .title h4 {color:#08526e;font-size:26px;font-family:"Times New Roman";font-weight:normal;} '''