mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
80 lines
3.3 KiB
Python
80 lines
3.3 KiB
Python
#!/usr/bin/env python
|
|
__license__ = 'GPL v3'
|
|
__author__ = 'Lorenzo Vigentini'
|
|
__copyright__ = '2009, Lorenzo Vigentini <l.vigentini at gmail.com>'
|
|
__version__ = 'v1.02'
|
|
__date__ = '10, January 2010'
|
|
__description__ = 'Sport news from the most read sport newspaper in Italy'
|
|
|
|
'''www.gazzetta.it'''
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
class laGazzetta(BasicNewsRecipe):
|
|
__author__ = 'Lorenzo Vigentini'
|
|
description = 'Sport news from the most read sport newspaper in Italy'
|
|
|
|
cover_url = 'http://www.gazzetta.it/primapagina/images/prima_pagina_grande.png'
|
|
title = 'La Gazzetta dello Sport '
|
|
publisher = 'RCS Digital'
|
|
category = 'Sport News'
|
|
|
|
language = 'it'
|
|
encoding = 'cp1252'
|
|
timefmt = '[%a, %d %b, %Y]'
|
|
|
|
oldest_article = 2
|
|
max_articles_per_feed = 20
|
|
use_embedded_content = False
|
|
recursion = 10
|
|
|
|
remove_javascript = True
|
|
no_stylesheets = True
|
|
|
|
keep_only_tags = [ dict(name='div', attrs={'id':'articolo'})]
|
|
|
|
remove_tags = [
|
|
dict(name='ul',attrs={'id':['service-toolbar','sections-menu']}),
|
|
dict(name='div',attrs={'id':['header','rightcol','sponsored','vxFlashPlayer','footer','print-box']}),
|
|
dict(name='iframe',attrs={'id':'mirago-feed'}),
|
|
dict(name='a',attrs={'id':'commenta-up'}),
|
|
dict(name='cite',attrs={'class':['signature','parag-title']}),
|
|
dict(name='a',attrs={'class':['last-comment','button-bold2']}),
|
|
dict(name=['base','object','link','a','script','noscript'])
|
|
]
|
|
|
|
extra_css = '''
|
|
h1 {font: sans-serif large;}
|
|
h2 {font: sans-serif medium;}
|
|
h3 {font: sans-serif small;}
|
|
h4 {font: sans-serif bold small;}
|
|
p {font:10pt helvetica}
|
|
dd {font:8pt helvetica}
|
|
'''
|
|
|
|
feeds = [
|
|
(u'Calcio',u'http://www.gazzetta.it/rss/Calcio.xml'),
|
|
(u'Formula 1',u'http://www.gazzetta.it/rss/Formula1.xml'),
|
|
(u'Motomodiale',u'http://www.gazzetta.it/rss/Motomondiale.xml'),
|
|
(u'Motori',u'http://www.gazzetta.it/rss/Motori.xml'),
|
|
(u'Ciclismo',u'http://www.gazzetta.it/rss/Ciclismo.xml'),
|
|
(u'Basket',u'http://www.gazzetta.it/rss/Basket.xml'),
|
|
(u'Tennis',u'http://www.gazzetta.it/rss/Tennis.xml'),
|
|
(u'Pallavolo',u'http://www.gazzetta.it/rss/Pallavolo.xml'),
|
|
(u'Vela',u'http://www.gazzetta.it/rss/Vela.xml'),
|
|
(u'Atletica',u'http://www.gazzetta.it/rss/Atletica.xml'),
|
|
(u'Altri Sport',u'http://www.gazzetta.it/rss/Sport_Vari.xml')
|
|
]
|
|
|
|
def print_version(self,url):
|
|
segments = url.split('/')
|
|
basename = '/'.join(segments[:3])+'/'
|
|
subPath= '/'.join(segments[3:7])+'/'
|
|
articleURL=(segments[len(segments)-1])[:-6]
|
|
myArticleSegs=articleURL.split('.')
|
|
myArticle=myArticleSegs[0]
|
|
printVerString=myArticle+ '_print.html'
|
|
myURL = basename + subPath + printVerString
|
|
print 'this is the url: ' + myURL
|
|
return basename + subPath + printVerString
|