mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
113 lines
4.2 KiB
Python
113 lines
4.2 KiB
Python
#!/usr/bin/env python
|
|
##
|
|
# Title: Il Post recipe for calibre
|
|
# Author: Marco Scirea, based on a recipe by frafra
|
|
# Contact: marco.prolog at gmail.com
|
|
##
|
|
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
|
# Copyright: Copyright 2019 Marco Scirea
|
|
##
|
|
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
from calibre.utils.magick import Image
|
|
|
|
# ----------- CUSTOMIZATION OPTIONS START -----------
|
|
|
|
# Comment (add # in front) to disable the sections you are not interested in
|
|
# Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare
|
|
sections = [
|
|
("Prima Pagina", "https://www.ilpost.it/prime-pagine"),
|
|
("Mondo", "https://www.ilpost.it/mondo/"),
|
|
("Politica", "https://www.ilpost.it/politica/"),
|
|
("Tecnologia", "https://www.ilpost.it/tecnologia/"),
|
|
("Internet", "https://www.ilpost.it/internet/"),
|
|
("Scienza", "https://www.ilpost.it/scienza/"),
|
|
("Cultura", "https://www.ilpost.it/cultura/"),
|
|
("Economia", "https://www.ilpost.it/economia/"),
|
|
("Sport", "https://www.ilpost.it/sport/"),
|
|
("Media", "https://www.ilpost.it/media/"),
|
|
("Moda", "https://www.ilpost.it/moda/"),
|
|
("Libri", "https://www.ilpost.it/libri/"),
|
|
("Auto", "https://www.ilpost.it/auto/"),
|
|
("Konrad", "https://www.ilpost.it/europa/"),
|
|
]
|
|
|
|
# Change this to True if you want grey images
|
|
convert_to_grayscale = False
|
|
|
|
# ----------- CUSTOMIZATION OPTIONS OVER -----------
|
|
|
|
prefixes = {"Permalink to", "Commenta", "Link all'articolo"}
|
|
|
|
|
|
class IlPost(BasicNewsRecipe):
|
|
__author__ = 'Marco Scirea'
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2019, Marco Scirea <marco.prolog at gmail.com>'
|
|
|
|
title = "Il Post"
|
|
language = "it"
|
|
description = (
|
|
'Puoi decidere quali sezioni scaricare modificando la ricetta.'
|
|
' Di default le immagini sono convertite in scala di grigio per risparmiare spazio,'
|
|
' la ricetta puo\' essere configurata per tenerle a colori'
|
|
)
|
|
tags = "news"
|
|
cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg"
|
|
ignore_duplicate_articles = {"title", "url"}
|
|
no_stylesheets = True
|
|
keep_only_tags = [dict(id=["expanding", "singleBody"])]
|
|
|
|
def parse_page(self, name, url):
|
|
self.log.debug(url)
|
|
soup = self.index_to_soup(url)
|
|
entries = []
|
|
for article in soup.findAll('article'):
|
|
for link in article.findAll('a', href=True, title=True):
|
|
if not link["href"].startswith("https://www.ilpost.it/20"):
|
|
continue
|
|
title = link["title"]
|
|
for prefix in prefixes:
|
|
if title.startswith(prefix):
|
|
title = title.lstrip(prefix)
|
|
break
|
|
title = title.strip()
|
|
entries.append({
|
|
"url": link["href"],
|
|
"title": title,
|
|
})
|
|
return (name, entries)
|
|
|
|
def populate_article_metadata(self, article, soup, first):
|
|
description = soup.find(attrs={"name": "description"})
|
|
article.summary = description[
|
|
"content"] if description else "No meta description given"
|
|
article.text_summary = description[
|
|
"content"] if description else "No meta description given"
|
|
|
|
def parse_index(self):
|
|
feeds = []
|
|
for section in sections:
|
|
feeds.append(self.parse_page(section[0], section[1]))
|
|
return feeds
|
|
|
|
if convert_to_grayscale:
|
|
# Image conversion to greyscale by Starson17
|
|
# https://www.mobileread.com/forums/showpost.php?p=1814815&postcount=15
|
|
def postprocess_html(self, soup, first):
|
|
# process all the images
|
|
for tag in soup.findAll('img', src=True):
|
|
iurl = tag['src']
|
|
img = Image()
|
|
img.open(iurl)
|
|
img.type = "GrayscaleType"
|
|
img.save(iurl)
|
|
return soup
|
|
|
|
def preprocess_html(self, soup):
|
|
galleryItems = soup.findAll("figure", {"class": "gallery-item"})
|
|
if galleryItems:
|
|
self.abort_article()
|
|
return soup
|