#!/usr/bin/env python __license__ = 'GPL v3' __author__ = 'Lorenzo Vigentini' __copyright__ = '2009, Lorenzo Vigentini ' __version__ = 'v1.02' __date__ = '10, January 2010' __description__ = 'Sport news from the most read sport newspaper in Italy' '''www.gazzetta.it''' from calibre.web.feeds.news import BasicNewsRecipe class laGazzetta(BasicNewsRecipe): __author__ = 'Lorenzo Vigentini' description = 'Sport news from the most read sport newspaper in Italy' cover_url = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png' title = 'La Gazzetta dello Sport ' publisher = 'RCS Digital' category = 'Sport News' language = 'it' encoding = 'cp1252' timefmt = '[%a, %d %b, %Y]' oldest_article = 2 max_articles_per_feed = 20 use_embedded_content = False recursion = 10 remove_javascript = True no_stylesheets = True keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})] remove_tags = [ dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}), dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}), dict(name='iframe',attrs={'id':'mirago-feed'}), dict(name='a',attrs={'id':'commenta-up'}), dict(name='cite',attrs={'class':['signature','parag-title']}), dict(name='a',attrs={'class':['last-comment','button-bold2']}), dict(name=['base','object','link','a','script','noscript']) ] extra_css = ''' h1 {font: sans-serif large;} h2 {font: sans-serif medium;} h3 {font: sans-serif small;} h4 {font: sans-serif bold small;} p {font:10pt helvetica} dd {font:8pt helvetica} ''' feeds = [ (u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'), (u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'), (u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'), (u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'), (u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'), (u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'), (u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'), (u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'), (u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'), (u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'), (u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml') ] def print_version(self,url): segments = url.split('/') basename = '/'.join(segments[:3])+'/' subPath= '/'.join(segments[3:7])+'/' articleURL=(segments[len(segments)-1])[:-6] myArticleSegs=articleURL.split('.') myArticle=myArticleSegs[0] printVerString=myArticle+ '_print.html' myURL = basename + subPath + printVerString print 'this is the url: ' + myURL return basename + subPath + printVerString