From 92bef3ec5e195c246ce6a109ebd10af853d20f49 Mon Sep 17 00:00:00 2001
From: Kovid Goyal <kovid@kovidgoyal.net>
Date: Sun, 4 Sep 2022 14:32:13 +0530
Subject: [PATCH] Update The Baffler

---
 recipes/the_baffler.recipe | 97 ++++++++++++++++++++++----------------
 1 file changed, 57 insertions(+), 40 deletions(-)

diff --git a/recipes/the_baffler.recipe b/recipes/the_baffler.recipe
index 8fb9c96ebb..5f097741d3 100644
--- a/recipes/the_baffler.recipe
+++ b/recipes/the_baffler.recipe
@@ -1,68 +1,85 @@
-from calibre.web.feeds.recipes import BasicNewsRecipe
-import re
-
-
-def classes(classes):
-    q = frozenset(classes.split(' '))
-    return dict(
-        attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
-    )
+from calibre.web.feeds.news import BasicNewsRecipe, classes
 
 
 class TheBaffler(BasicNewsRecipe):
-
     title = 'The Baffler'
-    __author__ = 'Jose Ortiz'
+    __author__ = 'unkn0wn'
     description = ('This magazine contains left-wing criticism, cultural analysis, shorts'
                    ' stories, poems and art.  They publish six print issues annually.')
     language = 'en'
     encoding = 'UTF-8'
     no_javascript = True
     no_stylesheets = True
+    remove_attributes = ['style','height','width']
+
+    extra_css = '''
+        .entry-subtitle{color:#202020; font-style:italic; text:align:left;}
+        blockquote{color:gray;}
+        em{color:#404040;}
+        .wp-caption-text{font-size:small; text-align:center;}
+        .lg:text-xs{color:gray; font-size:small; text-align:center;}
+        .author-meta{font-size:small; color:gray;}
+    '''
 
     keep_only_tags = [
-        classes('header-contain entry-content')
+        dict(name='main', attrs={'id':'main'})
     ]
 
+    remove_tags = [
+        classes('entry-date issue-number-segment single-article-vertical donation-footer'),
+        dict(name='footer')
+    ]
+
+    def get_cover_url(self):
+        soup = self.index_to_soup('https://shop.exacteditions.com/us/the-baffler')
+        tag = soup.find('div', attrs={'class': 'row'})
+        if tag:
+            self.cover_url = tag.find('img')['src']
+        return getattr(self, 'cover_url', self.cover_url)
+
     def parse_index(self):
-        soup = self.index_to_soup('https://thebaffler.com/issues').main.article
-        self.timefmt = ' [%s]' % self.tag_to_string(soup.find(**classes('date'))).strip()
-        try:
-            self.cover_url = re.sub(
-                r'.*?url\((.*?)\).*', r'\1',
-                soup.find(**classes('image-fill'))['style']).strip()
-            self.log('cover_url at ', self.cover_url)
-        except:
-            self.log.error('Failed to download cover_url')
+        soup = self.index_to_soup('https://thebaffler.com/issues')
+        issue = soup.find('article')
+        edition = self.tag_to_string(issue.find('h3')).strip().split('—')[1]
+        if edition:
+            self.log('Downloading Issue: ', edition)
+            self.title = 'The Baffler : ' + edition
+        self.timefmt = ' [' + self.tag_to_string(issue.find('div', **classes('font-lion'))).strip() + ']'
+        a = issue.find('a')
 
-        soup = self.index_to_soup(soup.a['href'])
-
-        # Extract comments from `.entry-content' and prepend to self.description
         self.description = (
-            u'\n\n' + self.tag_to_string(soup.find(**classes('entry-content'))) +
+            u'\n\n' + self.tag_to_string(a).strip() +
             u'\n\n' + self.description
         )
 
+        soup = self.index_to_soup(a['href'])
         ans = []
-
-        # Articles at `.contents section .meta'
-        for section in soup.find(**classes('contents'))('section'):
-            current_section = self.tag_to_string(section.h2)
+        main = soup.find('main', attrs={'id':'main'})
+        for section in main.findAll('section'):
+            current_section = self.tag_to_string(section.h1).strip()
             self.log(current_section)
             articles = []
-            for div in section(**classes('meta')):
-                # Getting articles
-                a = div.find(**classes('title')).a
-                title = self.tag_to_string(a)
-                url = a['href']
-                self.log('\t', title, ' at ', url)
+            for h3 in section.findAll('h3'):
+                title = self.tag_to_string(h3)
+                url = h3.a['href']
                 desc = ''
-                r = div.find(**classes('deck'))
-                if r is not None:
-                    desc = self.tag_to_string(r)
+                span = h3.findNext('span')
+                if span:
+                    desc = self.tag_to_string(span).strip()
+                span2 = span.findNext('span')
+                if span2:
+                    desc = self.tag_to_string(span2).strip() + ' | ' + desc
+                self.log('\t', title, '\n\t', desc, '\n\t\t', url)
                 articles.append(
                     {'title': title, 'url': url, 'description': desc})
-            if current_section and articles:
+            if articles:
                 ans.append((current_section,articles))
-
         return ans
+
+    def preprocess_html(self, soup):
+        div = soup.find('div', **classes('entry-title'))
+        if div:
+            div.name = 'h1'
+        for p in soup.findAll('p', attrs={'class':'parasectionhed'}):
+            p.name = 'h4'
+        return soup