mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-05 08:40:13 -04:00
The Baffler by Jose Ortiz
This commit is contained in:
parent
85fbb87b82
commit
ba5f3cdce7
68
recipes/the_baffler.recipe
Normal file
68
recipes/the_baffler.recipe
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(
|
||||||
|
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)}
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
class TheBaffler(BasicNewsRecipe):
|
||||||
|
|
||||||
|
title = 'The Baffler'
|
||||||
|
__author__ = 'Jose Ortiz'
|
||||||
|
description = ('This magazine contains left-wing criticism, cultural analysis, shorts'
|
||||||
|
' stories, poems and art. They publish six print issues annually.')
|
||||||
|
language = 'en_US'
|
||||||
|
encoding = 'UTF-8'
|
||||||
|
no_javascript = True
|
||||||
|
no_stylesheets = True
|
||||||
|
|
||||||
|
keep_only_tags = [
|
||||||
|
classes('header-contain entry-content')
|
||||||
|
]
|
||||||
|
|
||||||
|
def parse_index(self):
|
||||||
|
soup = self.index_to_soup('https://thebaffler.com/issues').main.article
|
||||||
|
self.timefmt = ' [%s]' % self.tag_to_string(soup.find(**classes('date'))).strip()
|
||||||
|
try:
|
||||||
|
self.cover_url = re.sub(
|
||||||
|
r'.*?url\((.*?)\).*', r'\1',
|
||||||
|
soup.find(**classes('image-fill'))['style']).strip()
|
||||||
|
self.log('cover_url at ', self.cover_url)
|
||||||
|
except:
|
||||||
|
self.log.error('Failed to download cover_url')
|
||||||
|
|
||||||
|
soup = self.index_to_soup(soup.a['href'])
|
||||||
|
|
||||||
|
# Extract comments from `.entry-content' and prepend to self.description
|
||||||
|
self.description = (
|
||||||
|
u'\n\n' + self.tag_to_string(soup.find(**classes('entry-content'))) +
|
||||||
|
u'\n\n' + self.description
|
||||||
|
)
|
||||||
|
|
||||||
|
ans = []
|
||||||
|
|
||||||
|
# Articles at `.contents section .meta'
|
||||||
|
for section in soup.find(**classes('contents'))('section'):
|
||||||
|
current_section = self.tag_to_string(section.h2)
|
||||||
|
self.log(current_section)
|
||||||
|
articles = []
|
||||||
|
for div in section(**classes('meta')):
|
||||||
|
# Getting articles
|
||||||
|
a = div.find(**classes('title')).a
|
||||||
|
title = self.tag_to_string(a)
|
||||||
|
url = a['href']
|
||||||
|
self.log('\t', title, ' at ', url)
|
||||||
|
desc = ''
|
||||||
|
r = div.find(**classes('deck'))
|
||||||
|
if r is not None:
|
||||||
|
desc = self.tag_to_string(r)
|
||||||
|
articles.append(
|
||||||
|
{'title': title, 'url': url, 'description': desc})
|
||||||
|
if current_section and articles:
|
||||||
|
ans.append((current_section,articles))
|
||||||
|
|
||||||
|
return ans
|
Loading…
x
Reference in New Issue
Block a user