Update The Baffler

This commit is contained in:
Kovid Goyal 2023-08-20 09:09:54 +05:30
parent 60d44baaf5
commit 4d1cd721b0
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -3,7 +3,7 @@ from calibre.web.feeds.news import BasicNewsRecipe, classes
class TheBaffler(BasicNewsRecipe): class TheBaffler(BasicNewsRecipe):
title = 'The Baffler' title = 'The Baffler'
__author__ = 'unkn0wn' __author__ = 'flobotnik and unkn0wn'
description = ('This magazine contains left-wing criticism, cultural analysis, shorts' description = ('This magazine contains left-wing criticism, cultural analysis, shorts'
' stories, poems and art. They publish six print issues annually.') ' stories, poems and art. They publish six print issues annually.')
language = 'en' language = 'en'
@ -26,8 +26,9 @@ class TheBaffler(BasicNewsRecipe):
] ]
remove_tags = [ remove_tags = [
classes('entry-date issue-number-segment single-article-vertical donation-footer'), classes('entry-date issue-number-segment single-article-vertical donation-footer story-footer ml-4 mt-14'),
dict(name='footer') dict(name='footer'),
dict(name='a', class_='ml-4 pr-px font-sans text-sm lg:text-xs whitespace-nowrap')
] ]
def get_cover_url(self): def get_cover_url(self):
@ -40,7 +41,7 @@ class TheBaffler(BasicNewsRecipe):
def parse_index(self): def parse_index(self):
soup = self.index_to_soup('https://thebaffler.com/issues') soup = self.index_to_soup('https://thebaffler.com/issues')
issue = soup.find('article') issue = soup.find('article')
edition = self.tag_to_string(issue.find('h3')).strip().split('—')[1] edition = self.tag_to_string(issue.find('h2')).strip().split('—')[1]
if edition: if edition:
self.log('Downloading Issue: ', edition) self.log('Downloading Issue: ', edition)
self.title = 'The Baffler : ' + edition self.title = 'The Baffler : ' + edition
@ -56,14 +57,14 @@ class TheBaffler(BasicNewsRecipe):
ans = [] ans = []
main = soup.find('main', attrs={'id':'main'}) main = soup.find('main', attrs={'id':'main'})
for section in main.findAll('section'): for section in main.findAll('section'):
current_section = self.tag_to_string(section.h1).strip() current_section = self.tag_to_string(section.h3).strip()
self.log(current_section) self.log(current_section)
articles = [] articles = []
for h3 in section.findAll('h3'): for h4 in section.findAll('h4'):
title = self.tag_to_string(h3) title = self.tag_to_string(h4)
url = h3.a['href'] url = h4.a['href']
desc = '' desc = ''
span = h3.findNext('span') span = h4.findNext('span')
if span: if span:
desc = self.tag_to_string(span).strip() desc = self.tag_to_string(span).strip()
span2 = span.findNext('span') span2 = span.findNext('span')