mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Update Il Post
This commit is contained in:
parent
dde3ae0416
commit
f19fda0857
BIN
recipes/icons/il_post.png
Normal file
BIN
recipes/icons/il_post.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 614 B |
@ -1,35 +1,104 @@
|
||||
#!/usr/bin/env python2
|
||||
##
|
||||
# Title: Il Post recipe for calibre
|
||||
# Author: Marco Scirea, based on a recipe by frafra
|
||||
# Contact: marco.prolog at gmail.com
|
||||
##
|
||||
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
|
||||
# Copyright: Copyright 2019 Marco Scirea
|
||||
##
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
from calibre.utils.magick import Image
|
||||
|
||||
prefixes = {"Permalink to", "Commenta"}
|
||||
# ----------- CUSTOMIZATION OPTIONS START -----------
|
||||
|
||||
# Comment (add # in front) to disable the sections you are not interested in
|
||||
# Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare
|
||||
sections = [
|
||||
("Prima Pagina", "https://www.ilpost.it/"),
|
||||
("Mondo", "https://www.ilpost.it/mondo/"),
|
||||
("Politica", "https://www.ilpost.it/politica/"),
|
||||
("Tecnologia", "https://www.ilpost.it/tecnologia/"),
|
||||
("Internet",
|
||||
"https://www.ilpost.it/internet/")("Scienza", "https://www.ilpost.it/scienza/"),
|
||||
("Cultura", "https://www.ilpost.it/cultura/"),
|
||||
("Economia", "https://www.ilpost.it/economia/"),
|
||||
("Sport", "https://www.ilpost.it/sport/"),
|
||||
("Media", "https://www.ilpost.it/media/"),
|
||||
("Moda", "https://www.ilpost.it/moda/"),
|
||||
("Libri", "https://www.ilpost.it/libri/"),
|
||||
("Auto", "https://www.ilpost.it/auto/"),
|
||||
("Konrad", "https://www.ilpost.it/europa/"),
|
||||
]
|
||||
|
||||
# Change this to True if you want grey images
|
||||
convert_to_grayscale = False
|
||||
|
||||
# ----------- CUSTOMIZATION OPTIONS OVER -----------
|
||||
|
||||
prefixes = {"Permalink to", "Commenta", "Link all'articolo"}
|
||||
|
||||
|
||||
class IlPost(BasicNewsRecipe):
|
||||
__author__ = 'Marco Scirea'
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2019, Marco Scirea <marco.prolog at gmail.com>'
|
||||
|
||||
title = "Il Post"
|
||||
language = "it"
|
||||
__author__ = 'frafra'
|
||||
description = ('Puoi decidere quali sezioni scaricare modificando la ricetta.'
|
||||
' Di default le immagini sono convertite in scala di grigio per risparmiare spazio,'
|
||||
' la ricetta puo\' essere configurata per tenerle a colori')
|
||||
tags = "news"
|
||||
cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg"
|
||||
ignore_duplicate_articles = {"url"}
|
||||
ignore_duplicate_articles = {"title", "url"}
|
||||
no_stylesheets = True
|
||||
keep_only_tags = [dict(id=["expanding", "singleBody"])]
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup("https://www.ilpost.it/")
|
||||
def parse_page(self, name, url):
|
||||
self.log.debug(url)
|
||||
soup = self.index_to_soup(url)
|
||||
entries = []
|
||||
for link in soup.findAll('a', href=True, title=True):
|
||||
if not link["href"].startswith("https://www.ilpost.it/20"):
|
||||
continue
|
||||
title = link["title"]
|
||||
for prefix in prefixes:
|
||||
if title.startswith(prefix):
|
||||
title = title.lstrip(prefix)
|
||||
break
|
||||
title = title.strip()
|
||||
entries.append({
|
||||
"url": link["href"],
|
||||
"title": title,
|
||||
})
|
||||
return [("Il Post", entries)]
|
||||
for article in soup.findAll('article'):
|
||||
for link in article.findAll('a', href=True, title=True):
|
||||
if not link["href"].startswith("https://www.ilpost.it/20"):
|
||||
continue
|
||||
title = link["title"]
|
||||
for prefix in prefixes:
|
||||
if title.startswith(prefix):
|
||||
title = title.lstrip(prefix)
|
||||
break
|
||||
title = title.strip()
|
||||
entries.append({
|
||||
"url": link["href"],
|
||||
"title": title,
|
||||
})
|
||||
return (name, entries)
|
||||
|
||||
def populate_article_metadata(self, article, soup, first):
|
||||
description = soup.find(attrs={"name": "description"})
|
||||
article.summary = description[
|
||||
"content"] if description else "No meta description given"
|
||||
article.text_summary = description[
|
||||
"content"] if description else "No meta description given"
|
||||
|
||||
def parse_index(self):
|
||||
feeds = []
|
||||
for section in sections:
|
||||
feeds.append(self.parse_page(section[0], section[1]))
|
||||
return feeds
|
||||
|
||||
if convert_to_grayscale:
|
||||
# Image conversion to greyscale by Starson17
|
||||
# https://www.mobileread.com/forums/showpost.php?p=1814815&postcount=15
|
||||
def postprocess_html(self, soup, first):
|
||||
# process all the images
|
||||
for tag in soup.findAll('img', src=True):
|
||||
iurl = tag['src']
|
||||
img = Image()
|
||||
img.open(iurl)
|
||||
img.type = "GrayscaleType"
|
||||
img.save(iurl)
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user