mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-12-23 21:37:22 -05:00
95 lines
3.5 KiB
Python
95 lines
3.5 KiB
Python
#!/usr/bin/env python
|
|
##
|
|
# Title: Il Post recipe for calibre
|
|
# Author: Marco Scirea, based on a recipe by frafra
|
|
# Contact: marco.prolog at gmail.com
|
|
##
|
|
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
|
# Copyright: Copyright 2019 Marco Scirea
|
|
##
|
|
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
from datetime import date, timedelta
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
dates = [date.today().strftime('%Y/%m/%d'), (date.today() - timedelta(1)).strftime('%Y/%m/%d')]
|
|
|
|
# ----------- CUSTOMIZATION OPTIONS START -----------
|
|
|
|
# Comment (add # in front) to disable the sections you are not interested in
|
|
# Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare
|
|
sections = [
|
|
('Italia', 'https://www.ilpost.it/italia/'),
|
|
('Mondo', 'https://www.ilpost.it/mondo/'),
|
|
('Politica', 'https://www.ilpost.it/politica/'),
|
|
('Tecnologia', 'https://www.ilpost.it/tecnologia/'),
|
|
('Internet', 'https://www.ilpost.it/internet/'),
|
|
('Scienza', 'https://www.ilpost.it/scienza/'),
|
|
('Cultura', 'https://www.ilpost.it/cultura/'),
|
|
('Economia', 'https://www.ilpost.it/economia/'),
|
|
('Sport', 'https://www.ilpost.it/sport/'),
|
|
('Media', 'https://www.ilpost.it/media/'),
|
|
('Moda', 'https://www.ilpost.it/moda/'),
|
|
('Libri', 'https://www.ilpost.it/libri/'),
|
|
('Auto', 'https://www.ilpost.it/auto/'),
|
|
('Konrad', 'https://www.ilpost.it/europa/'),
|
|
]
|
|
|
|
# ----------- CUSTOMIZATION OPTIONS OVER -----------
|
|
|
|
|
|
class IlPost(BasicNewsRecipe):
|
|
__author__ = 'Marco Scirea, unkn0wn'
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2019, Marco Scirea <marco.prolog at gmail.com>'
|
|
|
|
title = 'Il Post'
|
|
language = 'it'
|
|
description = (
|
|
'Puoi decidere quali sezioni scaricare modificando la ricetta.'
|
|
' Di default le immagini sono convertite in scala di grigio per risparmiare spazio,'
|
|
" la ricetta puo' essere configurata per tenerle a colori"
|
|
)
|
|
tags = 'news'
|
|
masthead_url = 'https://www.ilpost.it/error/images/ilpost.svg'
|
|
ignore_duplicate_articles = {'title', 'url'}
|
|
no_stylesheets = True
|
|
extra_css = ' .wp-caption-text { font-size:small; } '
|
|
keep_only_tags = [dict(name='main', attrs={'id':lambda x: x and x.startswith('index_main-content__')})]
|
|
remove_tags_before = [dict(name='article')]
|
|
remove_tags_after = [dict(name='article')]
|
|
remove_tags = [
|
|
dict(attrs={'class':lambda x: x and x.startswith(
|
|
('index_actions__', 'index_il-post-comments___', 'index_art_tag__')
|
|
)}),
|
|
dict(attrs={'id':'audioPlayerArticle'})
|
|
]
|
|
|
|
def parse_page(self, name, url):
|
|
self.log.debug(url)
|
|
soup = self.index_to_soup(url)
|
|
entries = []
|
|
for article in soup.findAll('article'):
|
|
for link in article.findAll('a', href=True):
|
|
if not any(x in link['href'] for x in dates):
|
|
continue
|
|
title = self.tag_to_string(link.h2)
|
|
desc = self.tag_to_string(link.p)
|
|
if not title:
|
|
continue
|
|
self.log('\t', title)
|
|
entries.append({
|
|
'url': link['href'],
|
|
'title': title,
|
|
'description': desc
|
|
})
|
|
return name, entries
|
|
|
|
def parse_index(self):
|
|
feeds = []
|
|
for section in sections:
|
|
feeds.append(self.parse_page(section[0], section[1]))
|
|
return feeds
|