Update Il Post

2025-07-09 03:04:10 -04:00 · 2019-01-02 10:41:31 +05:30 · 2019-01-02 10:41:31 +05:30 · f19fda0857
commit f19fda0857
parent dde3ae0416
2 changed files with 88 additions and 19 deletions
--- a/recipes/icons/il_post.png
+++ b/recipes/icons/il_post.png
--- a/recipes/il_post.recipe
+++ b/recipes/il_post.recipe
@ -1,35 +1,104 @@
 #!/usr/bin/env python2
+##
+# Title:        Il Post recipe for calibre
+# Author:       Marco Scirea, based on a recipe by frafra
+# Contact:      marco.prolog at gmail.com
+##
+# License:      GNU General Public License v3 - http://www.gnu.org/copyleft/gpl.html
+# Copyright:    Copyright 2019 Marco Scirea
+##

 from __future__ import absolute_import, division, print_function, unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe
+from calibre.utils.magick import Image

-prefixes = {"Permalink to", "Commenta"}
+# ----------- CUSTOMIZATION OPTIONS START -----------
+
+# Comment (add # in front) to disable the sections you are not interested in
+# Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare
+sections = [
+    ("Prima Pagina", "https://www.ilpost.it/"),
+    ("Mondo", "https://www.ilpost.it/mondo/"),
+    ("Politica", "https://www.ilpost.it/politica/"),
+    ("Tecnologia", "https://www.ilpost.it/tecnologia/"),
+    ("Internet",
+     "https://www.ilpost.it/internet/")("Scienza", "https://www.ilpost.it/scienza/"),
+    ("Cultura", "https://www.ilpost.it/cultura/"),
+    ("Economia", "https://www.ilpost.it/economia/"),
+    ("Sport", "https://www.ilpost.it/sport/"),
+    ("Media", "https://www.ilpost.it/media/"),
+    ("Moda", "https://www.ilpost.it/moda/"),
+    ("Libri", "https://www.ilpost.it/libri/"),
+    ("Auto", "https://www.ilpost.it/auto/"),
+    ("Konrad", "https://www.ilpost.it/europa/"),
+]
+
+# Change this to True if you want grey images
+convert_to_grayscale = False
+
+# ----------- CUSTOMIZATION OPTIONS OVER -----------
+
+prefixes = {"Permalink to", "Commenta", "Link all'articolo"}


 class IlPost(BasicNewsRecipe):
+    __author__ = 'Marco Scirea'
+    __license__ = 'GPL v3'
+    __copyright__ = '2019, Marco Scirea <marco.prolog at gmail.com>'
+
    title = "Il Post"
    language = "it"
-    __author__ = 'frafra'
+    description = ('Puoi decidere quali sezioni scaricare modificando la ricetta.'
+            ' Di default le immagini sono convertite in scala di grigio per risparmiare spazio,'
+            ' la ricetta puo\' essere configurata per tenerle a colori')
    tags = "news"
    cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg"
-    ignore_duplicate_articles = {"url"}
+    ignore_duplicate_articles = {"title", "url"}
    no_stylesheets = True
    keep_only_tags = [dict(id=["expanding", "singleBody"])]

-    def parse_index(self):
-        soup = self.index_to_soup("https://www.ilpost.it/")
+    def parse_page(self, name, url):
+        self.log.debug(url)
+        soup = self.index_to_soup(url)
        entries = []
-        for link in soup.findAll('a', href=True, title=True):
-            if not link["href"].startswith("https://www.ilpost.it/20"):
-                continue
-            title = link["title"]
-            for prefix in prefixes:
-                if title.startswith(prefix):
-                    title = title.lstrip(prefix)
-                    break
-            title = title.strip()
-            entries.append({
-                "url": link["href"],
-                "title": title,
-            })
-        return [("Il Post", entries)]
+        for article in soup.findAll('article'):
+            for link in article.findAll('a', href=True, title=True):
+                if not link["href"].startswith("https://www.ilpost.it/20"):
+                    continue
+                title = link["title"]
+                for prefix in prefixes:
+                    if title.startswith(prefix):
+                        title = title.lstrip(prefix)
+                        break
+                title = title.strip()
+                entries.append({
+                    "url": link["href"],
+                    "title": title,
+                })
+        return (name, entries)
+
+    def populate_article_metadata(self, article, soup, first):
+        description = soup.find(attrs={"name": "description"})
+        article.summary = description[
+            "content"] if description else "No meta description given"
+        article.text_summary = description[
+            "content"] if description else "No meta description given"
+
+    def parse_index(self):
+        feeds = []
+        for section in sections:
+            feeds.append(self.parse_page(section[0], section[1]))
+        return feeds
+
+    if convert_to_grayscale:
+        # Image conversion to greyscale by Starson17
+        # https://www.mobileread.com/forums/showpost.php?p=1814815&postcount=15
+        def postprocess_html(self, soup, first):
+            # process all the images
+            for tag in soup.findAll('img', src=True):
+                iurl = tag['src']
+                img = Image()
+                img.open(iurl)
+                img.type = "GrayscaleType"
+                img.save(iurl)
+            return soup