Update The Baffler

2025-07-09 03:04:10 -04:00 · 2022-09-04 14:32:13 +05:30 · 2022-09-04 14:32:13 +05:30 · 92bef3ec5e
commit 92bef3ec5e
parent 0d22b5f3ef
1 changed files with 57 additions and 40 deletions
--- a/recipes/the_baffler.recipe
+++ b/recipes/the_baffler.recipe
@ -1,68 +1,85 @@
-from calibre.web.feeds.recipes import BasicNewsRecipe
+from calibre.web.feeds.news import BasicNewsRecipe, classes
 import re
 def classes(classes):
    q = frozenset(classes.split(' '))
    return dict(
        attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
    )
 class TheBaffler(BasicNewsRecipe):
    title = 'The Baffler'
-    __author__ = 'Jose Ortiz'
+    __author__ = 'unkn0wn'
    description = ('This magazine contains left-wing criticism, cultural analysis, shorts'
                   ' stories, poems and art.  They publish six print issues annually.')
    language = 'en'
    encoding = 'UTF-8'
    no_javascript = True
    no_stylesheets = True
    remove_attributes = ['style','height','width']
    extra_css = '''
        .entry-subtitle{color:#202020; font-style:italic; text:align:left;}
        blockquote{color:gray;}
        em{color:#404040;}
        .wp-caption-text{font-size:small; text-align:center;}
        .lg:text-xs{color:gray; font-size:small; text-align:center;}
        .author-meta{font-size:small; color:gray;}
    '''
    keep_only_tags = [
-        classes('header-contain entry-content')
+        dict(name='main', attrs={'id':'main'})
    ]
    remove_tags = [
        classes('entry-date issue-number-segment single-article-vertical donation-footer'),
        dict(name='footer')
    ]
    def get_cover_url(self):
        soup = self.index_to_soup('https://shop.exacteditions.com/us/the-baffler')
        tag = soup.find('div', attrs={'class': 'row'})
        if tag:
            self.cover_url = tag.find('img')['src']
        return getattr(self, 'cover_url', self.cover_url)
    def parse_index(self):
-        soup = self.index_to_soup('https://thebaffler.com/issues').main.article
+        soup = self.index_to_soup('https://thebaffler.com/issues')
-        self.timefmt = ' [%s]' % self.tag_to_string(soup.find(**classes('date'))).strip()
+        issue = soup.find('article')
-        try:
+        edition = self.tag_to_string(issue.find('h3')).strip().split('—')[1]
-            self.cover_url = re.sub(
+        if edition:
-                r'.*?url\((.*?)\).*', r'\1',
+            self.log('Downloading Issue: ', edition)
-                soup.find(**classes('image-fill'))['style']).strip()
+            self.title = 'The Baffler : ' + edition
-            self.log('cover_url at ', self.cover_url)
+        self.timefmt = ' [' + self.tag_to_string(issue.find('div', **classes('font-lion'))).strip() + ']'
-        except:
+        a = issue.find('a')
            self.log.error('Failed to download cover_url')
        soup = self.index_to_soup(soup.a['href'])
        # Extract comments from `.entry-content' and prepend to self.description
        self.description = (
-            u'\n\n' + self.tag_to_string(soup.find(**classes('entry-content'))) +
+            u'\n\n' + self.tag_to_string(a).strip() +
            u'\n\n' + self.description
        )
        soup = self.index_to_soup(a['href'])
        ans = []
-
+        main = soup.find('main', attrs={'id':'main'})
-        # Articles at `.contents section .meta'
+        for section in main.findAll('section'):
-        for section in soup.find(**classes('contents'))('section'):
+            current_section = self.tag_to_string(section.h1).strip()
            current_section = self.tag_to_string(section.h2)
            self.log(current_section)
            articles = []
-            for div in section(**classes('meta')):
+            for h3 in section.findAll('h3'):
-                # Getting articles
+                title = self.tag_to_string(h3)
-                a = div.find(**classes('title')).a
+                url = h3.a['href']
                title = self.tag_to_string(a)
                url = a['href']
                self.log('\t', title, ' at ', url)
                desc = ''
-                r = div.find(**classes('deck'))
+                span = h3.findNext('span')
-                if r is not None:
+                if span:
-                    desc = self.tag_to_string(r)
+                    desc = self.tag_to_string(span).strip()
                span2 = span.findNext('span')
                if span2:
                    desc = self.tag_to_string(span2).strip() + ' | ' + desc
                self.log('\t', title, '\n\t', desc, '\n\t\t', url)
                articles.append(
                    {'title': title, 'url': url, 'description': desc})
-            if current_section and articles:
+            if articles:
                ans.append((current_section,articles))
        return ans
    def preprocess_html(self, soup):
        div = soup.find('div', **classes('entry-title'))
        if div:
            div.name = 'h1'
        for p in soup.findAll('p', attrs={'class':'parasectionhed'}):
            p.name = 'h4'
        return soup