diff --git a/recipes/icons/il_post.png b/recipes/icons/il_post.png new file mode 100644 index 0000000000..7e8dcb8ed7 Binary files /dev/null and b/recipes/icons/il_post.png differ diff --git a/recipes/il_post.recipe b/recipes/il_post.recipe index 6e65b9b538..b0e9da3507 100644 --- a/recipes/il_post.recipe +++ b/recipes/il_post.recipe @@ -1,35 +1,104 @@ #!/usr/bin/env python2 +## +# Title: Il Post recipe for calibre +# Author: Marco Scirea, based on a recipe by frafra +# Contact: marco.prolog at gmail.com +## +# License: GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html +# Copyright: Copyright 2019 Marco Scirea +## from __future__ import absolute_import, division, print_function, unicode_literals from calibre.web.feeds.news import BasicNewsRecipe +from calibre.utils.magick import Image -prefixes = {"Permalink to", "Commenta"} +# ----------- CUSTOMIZATION OPTIONS START ----------- + +# Comment (add # in front) to disable the sections you are not interested in +# Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare +sections = [ + ("Prima Pagina", "https://www.ilpost.it/"), + ("Mondo", "https://www.ilpost.it/mondo/"), + ("Politica", "https://www.ilpost.it/politica/"), + ("Tecnologia", "https://www.ilpost.it/tecnologia/"), + ("Internet", + "https://www.ilpost.it/internet/")("Scienza", "https://www.ilpost.it/scienza/"), + ("Cultura", "https://www.ilpost.it/cultura/"), + ("Economia", "https://www.ilpost.it/economia/"), + ("Sport", "https://www.ilpost.it/sport/"), + ("Media", "https://www.ilpost.it/media/"), + ("Moda", "https://www.ilpost.it/moda/"), + ("Libri", "https://www.ilpost.it/libri/"), + ("Auto", "https://www.ilpost.it/auto/"), + ("Konrad", "https://www.ilpost.it/europa/"), +] + +# Change this to True if you want grey images +convert_to_grayscale = False + +# ----------- CUSTOMIZATION OPTIONS OVER ----------- + +prefixes = {"Permalink to", "Commenta", "Link all'articolo"} class IlPost(BasicNewsRecipe): + __author__ = 'Marco Scirea' + __license__ = 'GPL v3' + __copyright__ = '2019, Marco Scirea ' + title = "Il Post" language = "it" - __author__ = 'frafra' + description = ('Puoi decidere quali sezioni scaricare modificando la ricetta.' + ' Di default le immagini sono convertite in scala di grigio per risparmiare spazio,' + ' la ricetta puo\' essere configurata per tenerle a colori') tags = "news" cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg" - ignore_duplicate_articles = {"url"} + ignore_duplicate_articles = {"title", "url"} no_stylesheets = True keep_only_tags = [dict(id=["expanding", "singleBody"])] - def parse_index(self): - soup = self.index_to_soup("https://www.ilpost.it/") + def parse_page(self, name, url): + self.log.debug(url) + soup = self.index_to_soup(url) entries = [] - for link in soup.findAll('a', href=True, title=True): - if not link["href"].startswith("https://www.ilpost.it/20"): - continue - title = link["title"] - for prefix in prefixes: - if title.startswith(prefix): - title = title.lstrip(prefix) - break - title = title.strip() - entries.append({ - "url": link["href"], - "title": title, - }) - return [("Il Post", entries)] + for article in soup.findAll('article'): + for link in article.findAll('a', href=True, title=True): + if not link["href"].startswith("https://www.ilpost.it/20"): + continue + title = link["title"] + for prefix in prefixes: + if title.startswith(prefix): + title = title.lstrip(prefix) + break + title = title.strip() + entries.append({ + "url": link["href"], + "title": title, + }) + return (name, entries) + + def populate_article_metadata(self, article, soup, first): + description = soup.find(attrs={"name": "description"}) + article.summary = description[ + "content"] if description else "No meta description given" + article.text_summary = description[ + "content"] if description else "No meta description given" + + def parse_index(self): + feeds = [] + for section in sections: + feeds.append(self.parse_page(section[0], section[1])) + return feeds + + if convert_to_grayscale: + # Image conversion to greyscale by Starson17 + # https://www.mobileread.com/forums/showpost.php?p=1814815&postcount=15 + def postprocess_html(self, soup, first): + # process all the images + for tag in soup.findAll('img', src=True): + iurl = tag['src'] + img = Image() + img.open(iurl) + img.type = "GrayscaleType" + img.save(iurl) + return soup