From f64dfddd402588c65e6a0bd39f7d6d915a36b844 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Wed, 20 Jan 2021 09:20:42 +0530
Subject: [PATCH] Update Jacobin

---
 recipes/jacobinmag.recipe | 99 ++++++++++++++++++---------------------
 1 file changed, 45 insertions(+), 54 deletions(-)

diff --git a/recipes/jacobinmag.recipe b/recipes/jacobinmag.recipe
index 285fddbc91..5804d55fec 100644
--- a/recipes/jacobinmag.recipe
+++ b/recipes/jacobinmag.recipe
@@ -11,6 +11,12 @@ www.jacobinmag.com
 from calibre.web.feeds.news import BasicNewsRecipe
 
 
+def classes(classes):
+    q = frozenset(classes.split(' '))
+    return dict(attrs={
+        'class': lambda x: x and frozenset(x.split()).intersection(q)})
+
+
 class Jacobinmag(BasicNewsRecipe):
     title = 'Jacobin'
     __author__ = 'Darko Miletic'
@@ -29,12 +35,11 @@ class Jacobinmag(BasicNewsRecipe):
     issue_url = None
     PREFIX = 'https://www.jacobinmag.com'
     LOGIN = 'https://auth.jacobinmag.com/mini_profile?redirect=https%3A%2F%2Fwww.jacobinmag.com%2F'
-    masthead_url = 'https://www.jacobinmag.com/wp-content/themes/boukman/images/banner/type.svg'
     extra_css = """
-                               body{font-family: Antwerp, 'Times New Roman', Times, serif}
-                               img{margin-top:1em; margin-bottom: 1em; display:block}
-                               .entry-dek,.entry-author{font-family: Hurme-No3, Futura, sans-serif}
-                           """
+        body{font-family: Antwerp, 'Times New Roman', Times, serif}
+        img{margin-top:1em; margin-bottom: 1em; display:block}
+        .entry-dek,.entry-author{font-family: Hurme-No3, Futura, sans-serif}
+    """
 
     conversion_options = {
         'comment': description,
@@ -44,56 +49,50 @@ class Jacobinmag(BasicNewsRecipe):
     }
 
     remove_tags = [
-        dict(name=['meta', 'link']),
-        dict(name='div', attrs={'class': 'entry-bottom'}),
-        dict(name='div', attrs={'data-app': 'share_buttons'}),
+        dict(id=['post-header-share', 'post-print']),
+        dict(name='form'),
     ]
 
-    keep_only_tags = [dict(attrs={'class': ['entry-header', 'entry-content']})]
+    keep_only_tags = [
+        classes('po__article')
+    ]
 
     def parse_index(self):
         ans = []
         articles = []
-        lurl = self.get_issue()
-        if lurl:
-            soup = self.index_to_soup(lurl)
+        soup = self.index_to_soup('https://www.jacobinmag.com/store/issues')
+        lurl = 'https://jacobinmag.com' + soup.find('a', text='View Issue')['href']
+        feedtitle = 'Articles'
+        self.log('Loading issue from', lurl)
+        soup = self.index_to_soup(lurl)
 
-            # Find cover url
-            myimg = soup.find('img', attrs={'id': 'front-cover'})
-            if myimg:
-                self.cover_url = self.image_url_processor(None, myimg['src'])
-            # End find cover url
+        # Find cover url
+        di = soup.find('figure', attrs={'class': lambda x: x and '__cover' in x})
+        img = di.find('img')
+        self.cover_url = img['src']
+        # End find cover url
 
-            # Configure series
-            self.conversion_options.update({'series': 'Jacobin'})
+        # Get series title
+        title = soup.find('h1', attrs={'class': lambda x: x and '__heading' in x})
+        feedtitle = self.tag_to_string(title)
 
-            # Get series title
-            feedtitle = 'Articles'
-            title = soup.find('div', attrs={'id': 'iss-title-name'})
-            if title:
-                feedtitle = self.tag_to_string(title)
-
-            # Scrape article links
-            for section in soup.findAll('div', attrs={'class': 'section-articles'}):
-                for art in section.findAll('article'):
-                    urlbase = art.find('h3', attrs={'class': 'iss-hed'})
-                    if urlbase and urlbase.a[
-                        'href'
-                    ] != 'https://www.jacobinmag.com/subscribe/':
-                        url = urlbase.a['href']
-                        title = self.tag_to_string(urlbase)
-                        desc = ''
-                        descbase = urlbase = art.find(
-                            'p', attrs={'class': 'iss-dek'}
-                        )
-                        if descbase:
-                            desc = self.tag_to_string(descbase)
-                        articles.append({
-                            'title': title,
-                            'url': url,
-                            'description': desc
-                        })
-        ans.append((feedtitle, articles))
+        # Scrape article links
+        for section in soup.findAll('div', attrs={'class': lambda x: x and '__content' in x}):
+            for art in section.findAll('article'):
+                h1 = art.find('h1')
+                a = h1.find('a')
+                title = self.tag_to_string(a)
+                url = 'https://jacobinmag.com' + a['href']
+                desc = ''
+                p = art.find('p')
+                if p:
+                    desc = self.tag_to_string(p)
+                articles.append({'title': title, 'url': url, 'description': desc})
+                self.log(title, 'at', url)
+                if desc:
+                    self.log('\t', desc)
+        if articles:
+            ans.append((feedtitle, articles))
         return ans
 
     def get_browser(self):
@@ -111,11 +110,3 @@ class Jacobinmag(BasicNewsRecipe):
             if div:
                 br.open(div['data-redirect'])
         return br
-
-    def get_issue(self):
-        issue = None
-        soup = self.index_to_soup(self.PREFIX)
-        mag = soup.find('li', attrs={'class': 'magazine'})
-        if mag:
-            issue = mag.a['href']
-        return issue