From fcc94297274b38b16b73ca7da9fdd8a945cfac2f Mon Sep 17 00:00:00 2001
From: unkn0w7n <51942695+unkn0w7n@users.noreply.github.com>
Date: Sat, 23 Dec 2023 18:05:55 +0530
Subject: [PATCH] Update il_post.recipe

---
 recipes/il_post.recipe | 69 +++++++++++++++---------------------------
 1 file changed, 25 insertions(+), 44 deletions(-)

diff --git a/recipes/il_post.recipe b/recipes/il_post.recipe
index c7b3c063e8..0c163e0e99 100644
--- a/recipes/il_post.recipe
+++ b/recipes/il_post.recipe
@@ -11,13 +11,16 @@
 from __future__ import absolute_import, division, print_function, unicode_literals
 from calibre.web.feeds.news import BasicNewsRecipe
 from calibre.utils.magick import Image
+from datetime import date, timedelta
+
+dates = [ date.today().strftime('%Y/%m/%d'), (date.today() - timedelta(1)).strftime('%Y/%m/%d') ]
 
 # ----------- CUSTOMIZATION OPTIONS START -----------
 
 # Comment (add # in front) to disable the sections you are not interested in
 # Commenta (aggiungi # davanti alla riga) per disabilitare le sezioni che non vuoi scaricare
 sections = [
-    ("Prima Pagina", "https://www.ilpost.it/prime-pagine"),
+    ("Italia", "https://www.ilpost.it/italia/"),
     ("Mondo", "https://www.ilpost.it/mondo/"),
     ("Politica", "https://www.ilpost.it/politica/"),
     ("Tecnologia", "https://www.ilpost.it/tecnologia/"),
@@ -33,16 +36,11 @@ sections = [
     ("Konrad", "https://www.ilpost.it/europa/"),
 ]
 
-# Change this to True if you want grey images
-convert_to_grayscale = False
-
 # ----------- CUSTOMIZATION OPTIONS OVER -----------
 
-prefixes = {"Permalink to", "Commenta", "Link all'articolo"}
-
 
 class IlPost(BasicNewsRecipe):
-    __author__ = 'Marco Scirea'
+    __author__ = 'Marco Scirea, unkn0wn'
     __license__ = 'GPL v3'
     __copyright__ = '2019, Marco Scirea <marco.prolog at gmail.com>'
 
@@ -54,59 +52,42 @@ class IlPost(BasicNewsRecipe):
         ' la ricetta puo\' essere configurata per tenerle a colori'
     )
     tags = "news"
-    cover_url = "https://www.ilpost.it/wp-content/themes/ilpost/images/ilpost.svg"
+    masthead_url = 'https://www.ilpost.it/error/images/ilpost.svg'
     ignore_duplicate_articles = {"title", "url"}
     no_stylesheets = True
-    keep_only_tags = [dict(id=["expanding", "singleBody"])]
+    extra_css = ' .wp-caption-text { font-size:small; } '
+    keep_only_tags = [dict(name='main', attrs={'id':lambda x: x and x.startswith('index_main-content__')})]
+    remove_tags_before = [dict(name='article')]
+    remove_tags_after = [dict(name='article')]
+    remove_tags = [
+        dict(attrs={'class':lambda x: x and x.startswith(
+            ('index_actions__', 'index_il-post-comments___', 'index_art_tag__')
+        )}),
+        dict(attrs={'id':'audioPlayerArticle'})
+    ]
 
     def parse_page(self, name, url):
         self.log.debug(url)
         soup = self.index_to_soup(url)
         entries = []
         for article in soup.findAll('article'):
-            for link in article.findAll('a', href=True, title=True):
-                if not link["href"].startswith("https://www.ilpost.it/20"):
+            for link in article.findAll('a', href=True):
+                if not any(x in link['href'] for x in dates):
                     continue
-                title = link["title"]
-                for prefix in prefixes:
-                    if title.startswith(prefix):
-                        title = title.lstrip(prefix)
-                        break
-                title = title.strip()
-                entries.append({
+                title = self.tag_to_string(link.h2)
+                desc = self.tag_to_string(link.p)
+                if not title:
+                    continue
+                self.log('\t', title)
+                entries.append({ 
                     "url": link["href"],
                     "title": title,
+                    "description": desc
                 })
         return (name, entries)
 
-    def populate_article_metadata(self, article, soup, first):
-        description = soup.find(attrs={"name": "description"})
-        article.summary = description[
-            "content"] if description else "No meta description given"
-        article.text_summary = description[
-            "content"] if description else "No meta description given"
-
     def parse_index(self):
         feeds = []
         for section in sections:
             feeds.append(self.parse_page(section[0], section[1]))
         return feeds
-
-    if convert_to_grayscale:
-        # Image conversion to greyscale by Starson17
-        # https://www.mobileread.com/forums/showpost.php?p=1814815&postcount=15
-        def postprocess_html(self, soup, first):
-            # process all the images
-            for tag in soup.findAll('img', src=True):
-                iurl = tag['src']
-                img = Image()
-                img.open(iurl)
-                img.type = "GrayscaleType"
-                img.save(iurl)
-            return soup
-
-    def preprocess_html(self, soup):
-        galleryItems = soup.findAll("figure", {"class": "gallery-item"})
-        if galleryItems:
-            self.abort_article()
-        return soup