calibre/recipes/ilsole24ore.recipe
2019-10-13 09:49:27 +05:30

79 lines
2.9 KiB
Plaintext

__author__ = 'Marco Saraceno'
__copyright__ = '2010, Marco Saraceno <marcosaraceno at gmail.com>'
description = 'Italian daily newspaper - v 1.1 (Mar14,2011)'
'''
http://www.ilsole24ore.com
'''
from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class IlSole24Ore(BasicNewsRecipe):
__author__ = 'Marco Saraceno'
description = 'Italian financial daily newspaper'
cover_url = 'http://www.shopping24.ilsole24ore.com/ProductRelated/rds/img/logo_sole.gif'
title = u'Il Sole 24 Ore'
publisher = 'Gruppo editoriale GRUPPO 24ORE'
category = 'News, politics, culture, economy, financial, Italian'
language = 'it'
timefmt = '[%a, %d %b, %Y]'
oldest_article = 2
max_articles_per_feed = 100
use_embedded_content = False
extra_css = '.headline {font-size: x-large;} \n .fact { padding-top: 10pt }'
no_stylesheets = True
keep_only_tags = [
classes('ahead atext')
]
remove_tags = [
classes('ahead-ltool')
]
def get_article_url(self, article):
link = article.get('link', None)
if link is None:
return article
if link.split('/')[-1] == "story01.htm":
link = link.split('/')[-2]
a = ['0B', '0C', '0D', '0E', '0F', '0G', '0N', '0L0S', '0A']
b = ['.', '/', '?', '-', '=', '&', '.com', 'www.', '0']
for i in range(0, len(a)):
link = link.replace(a[i], b[i])
link = "http://" + link
return link
feeds = [
('Italia', 'https://www.ilsole24ore.com/rss/italia.xml'),
('Mondo', 'https://www.ilsole24ore.com/rss/mondo.xml'),
('Economia', 'https://www.ilsole24ore.com/rss/economia.xml'),
('Finanzia', 'https://www.ilsole24ore.com/rss/finanza.xml'),
('Commenti', 'https://www.ilsole24ore.com/rss/commenti.xml'),
('Risparmio', 'https://www.ilsole24ore.com/rss/risparmio.xml'),
('Norme e Tributi', 'https://www.ilsole24ore.com/rss/norme-e-tributi.xml'),
('Management', 'https://www.ilsole24ore.com/rss/management.xml'),
('Cultura', 'https://www.ilsole24ore.com/rss/cultura.xml'),
('Tecnologia', 'https://www.ilsole24ore.com/rss/tecnologia.xml'),
('Food', 'https://www.ilsole24ore.com/rss/food.xml'),
('Moda', 'https://www.ilsole24ore.com/rss/moda.xml'),
('Motori', 'https://www.ilsole24ore.com/rss/motori.xml'),
('Casa', 'https://www.ilsole24ore.com/rss/casa.xml'),
('Viaggi', 'https://www.ilsole24ore.com/rss/viaggi.xml'),
('Salute', 'https://www.ilsole24ore.com/rss/salute.xml'),
('Arteconomy', 'https://www.ilsole24ore.com/rss/arteconomy.xml'),
('Sport', 'https://www.ilsole24ore.com/rss/sport24.xml'),
]
def print_version(self, url):
return url.replace('.shtml', '_PRN.shtml')