#!/usr/bin/env python2 __license__ = 'GPL v3' __author__ = 'Lorenzo Vigentini' __copyright__ = '2009, Lorenzo Vigentini ' __version__ = 'v1.02' __date__ = '10, January 2010' __description__ = 'Sport news from the most read sport newspaper in Italy' '''www.gazzetta.it''' from calibre.web.feeds.news import BasicNewsRecipe class laGazzetta(BasicNewsRecipe): __author__ = 'Lorenzo Vigentini' description = 'Sport news from the most read sport newspaper in Italy' cover_url = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png' title = 'La Gazzetta dello Sport ' publisher = 'RCS Digital' category = 'Sport News' language = 'it' encoding = 'cp1252' timefmt = '[%a, %d %b, %Y]' oldest_article = 2 max_articles_per_feed = 20 use_embedded_content = False recursion = 10 remove_javascript = True no_stylesheets = True keep_only_tags = [dict(name='div', attrs={'id': 'articolo'})] remove_tags = [ dict(name='ul', attrs={'id': ['service-toolbar', 'sections-menu']}), dict(name='div', attrs={'id': [ 'header', 'rightcol', 'sponsored', 'vxFlashPlayer', 'footer', 'print-box']}), dict(name='iframe', attrs={'id': 'mirago-feed'}), dict(name='a', attrs={'id': 'commenta-up'}), dict(name='cite', attrs={'class': ['signature', 'parag-title']}), dict(name='a', attrs={'class': ['last-comment', 'button-bold2']}), dict(name=['base', 'object', 'link', 'a', 'script', 'noscript']) ] extra_css = ''' h1 {font: sans-serif large;} h2 {font: sans-serif medium;} h3 {font: sans-serif small;} h4 {font: sans-serif bold small;} p {font:10pt helvetica} dd {font:8pt helvetica} ''' feeds = [ (u'Calcio', u'http://www.gazzetta.it/rss/Calcio.xml'), (u'Formula 1', u'http://www.gazzetta.it/rss/Formula1.xml'), (u'Motomodiale', u'http://www.gazzetta.it/rss/Motomondiale.xml'), (u'Motori', u'http://www.gazzetta.it/rss/Motori.xml'), (u'Ciclismo', u'http://www.gazzetta.it/rss/Ciclismo.xml'), (u'Basket', u'http://www.gazzetta.it/rss/Basket.xml'), (u'Tennis', u'http://www.gazzetta.it/rss/Tennis.xml'), (u'Pallavolo', u'http://www.gazzetta.it/rss/Pallavolo.xml'), (u'Vela', u'http://www.gazzetta.it/rss/Vela.xml'), (u'Atletica', u'http://www.gazzetta.it/rss/Atletica.xml'), (u'Altri Sport', u'http://www.gazzetta.it/rss/Sport_Vari.xml') ] def print_version(self, url): segments = url.split('/') basename = '/'.join(segments[:3]) + '/' subPath = '/'.join(segments[3:7]) + '/' articleURL = (segments[len(segments) - 1])[:-6] myArticleSegs = articleURL.split('.') myArticle = myArticleSegs[0] printVerString = myArticle + '_print.html' myURL = basename + subPath + printVerString print 'this is the url: ' + myURL return basename + subPath + printVerString