Update Il Post

This commit is contained in:
Kovid Goyal 2019-01-02 10:41:31 +05:30
parent dde3ae0416
commit f19fda0857
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C
2 changed files with 88 additions and 19 deletions

BIN
recipes/icons/il_post.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 614 B

View File

@ -1,35 +1,104 @@
#!/usr/bin/env python2 #!/usr/bin/env python2
##
# Title: Il Post recipe for calibre
# Author: Marco Scirea, based on a recipe by frafra
# Contact: marco.prolog at gmail.com
##
# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
# Copyright: Copyright 2019 Marco Scirea
##
from __future__ import absolute_import, division, print_function, unicode_literals from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
from calibre.utils.magick import Image
prefixes = {"Permalink to", "Commenta"} # ----------- CUSTOMIZATION OPTIONS START -----------
# Comment (add # in front) to disable the sections you are not interested in
# Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare
sections = [
("Prima Pagina", "https://www.ilpost.it/"),
("Mondo", "https://www.ilpost.it/mondo/"),
("Politica", "https://www.ilpost.it/politica/"),
("Tecnologia", "https://www.ilpost.it/tecnologia/"),
("Internet",
"https://www.ilpost.it/internet/")("Scienza", "https://www.ilpost.it/scienza/"),
("Cultura", "https://www.ilpost.it/cultura/"),
("Economia", "https://www.ilpost.it/economia/"),
("Sport", "https://www.ilpost.it/sport/"),
("Media", "https://www.ilpost.it/media/"),
("Moda", "https://www.ilpost.it/moda/"),
("Libri", "https://www.ilpost.it/libri/"),
("Auto", "https://www.ilpost.it/auto/"),
("Konrad", "https://www.ilpost.it/europa/"),
]
# Change this to True if you want grey images
convert_to_grayscale = False
# ----------- CUSTOMIZATION OPTIONS OVER -----------
prefixes = {"Permalink to", "Commenta", "Link all'articolo"}
class IlPost(BasicNewsRecipe): class IlPost(BasicNewsRecipe):
__author__ = 'Marco Scirea'
__license__ = 'GPL v3'
__copyright__ = '2019, Marco Scirea <marco.prolog at gmail.com>'
title = "Il Post" title = "Il Post"
language = "it" language = "it"
__author__ = 'frafra' description = ('Puoi decidere quali sezioni scaricare modificando la ricetta.'
' Di default le immagini sono convertite in scala di grigio per risparmiare spazio,'
' la ricetta puo\' essere configurata per tenerle a colori')
tags = "news" tags = "news"
cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg" cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg"
ignore_duplicate_articles = {"url"} ignore_duplicate_articles = {"title", "url"}
no_stylesheets = True no_stylesheets = True
keep_only_tags = [dict(id=["expanding", "singleBody"])] keep_only_tags = [dict(id=["expanding", "singleBody"])]
def parse_index(self): def parse_page(self, name, url):
soup = self.index_to_soup("https://www.ilpost.it/") self.log.debug(url)
soup = self.index_to_soup(url)
entries = [] entries = []
for link in soup.findAll('a', href=True, title=True): for article in soup.findAll('article'):
if not link["href"].startswith("https://www.ilpost.it/20"): for link in article.findAll('a', href=True, title=True):
continue if not link["href"].startswith("https://www.ilpost.it/20"):
title = link["title"] continue
for prefix in prefixes: title = link["title"]
if title.startswith(prefix): for prefix in prefixes:
title = title.lstrip(prefix) if title.startswith(prefix):
break title = title.lstrip(prefix)
title = title.strip() break
entries.append({ title = title.strip()
"url": link["href"], entries.append({
"title": title, "url": link["href"],
}) "title": title,
return [("Il Post", entries)] })
return (name, entries)
def populate_article_metadata(self, article, soup, first):
description = soup.find(attrs={"name": "description"})
article.summary = description[
"content"] if description else "No meta description given"
article.text_summary = description[
"content"] if description else "No meta description given"
def parse_index(self):
feeds = []
for section in sections:
feeds.append(self.parse_page(section[0], section[1]))
return feeds
if convert_to_grayscale:
# Image conversion to greyscale by Starson17
# https://www.mobileread.com/forums/showpost.php?p=1814815&postcount=15
def postprocess_html(self, soup, first):
# process all the images
for tag in soup.findAll('img', src=True):
iurl = tag['src']
img = Image()
img.open(iurl)
img.type = "GrayscaleType"
img.save(iurl)
return soup