mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Create first_things
Recipe for First Things magazine.
This commit is contained in:
parent
cfa133d3e4
commit
488ae08a41
67
recipes/first_things
Normal file
67
recipes/first_things
Normal file
@ -0,0 +1,67 @@
|
||||
#!/usr/bin/env python2
|
||||
from __future__ import unicode_literals
|
||||
__license__ = 'GPL v3'
|
||||
__copyright__ = '2017, John Hutson <jfhutson at gmail.com>'
|
||||
'''
|
||||
firstthings.com
|
||||
'''
|
||||
import html5lib
|
||||
from lxml import html
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
class FirstThings(BasicNewsRecipe):
|
||||
|
||||
title = 'First Things'
|
||||
__author__ = 'John Hutson'
|
||||
description = 'America\'s Most Influential Journal of Religion and Public Life'
|
||||
INDEX = 'https://www.firstthings.com/current-edition'
|
||||
language = 'en'
|
||||
encoding = 'utf-8'
|
||||
|
||||
no_stylesheets = True
|
||||
|
||||
keep_only_tags = [
|
||||
dict(name='h1'),
|
||||
dict(attrs={'itemprop': ['author',]}),
|
||||
dict(attrs={'itemprop': 'articleBody'}),
|
||||
]
|
||||
|
||||
extra_css = '''
|
||||
.small-caps { font-variant: small-caps }
|
||||
.drop-cap { float: left; font-size: 75px; line-height: 60px; padding-top: 4px; padding-right: 8px; padding-left: 3px;}
|
||||
'''
|
||||
|
||||
def preprocess_raw_html(self, raw, url):
|
||||
return html.tostring(html5lib.parse(raw, treebuilder='lxml', namespaceHTMLElements=False), method='html', encoding=unicode)
|
||||
|
||||
def parse_index(self):
|
||||
soup = self.index_to_soup(self.INDEX)
|
||||
cover = soup.find('a', 'cover-link')
|
||||
if cover is not None:
|
||||
img = cover
|
||||
if img:
|
||||
self.cover_url = img['href']
|
||||
current_section, current_articles = 'Cover Story', []
|
||||
feeds = []
|
||||
for div in soup.findAll(['h3', 'h4', 'a']):
|
||||
if div.name == 'h3':
|
||||
if current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
current_articles = []
|
||||
current_section = self.tag_to_string(div)
|
||||
self.log('\nFound section:', current_section)
|
||||
elif div.name == 'h4':
|
||||
a = div.findChild('a')
|
||||
title = self.tag_to_string(a)
|
||||
url = a['href']
|
||||
desc = ''
|
||||
if url.startswith('/'):
|
||||
url = 'https://www.firstthings.com/' + url
|
||||
elif div.name == 'a' and div.rel == 'author':
|
||||
desc = self.tag_to_string(div)
|
||||
current_articles.append(
|
||||
{'title': title, 'url': url, 'description': desc})
|
||||
|
||||
if current_articles:
|
||||
feeds.append((current_section, current_articles))
|
||||
return feeds
|
Loading…
x
Reference in New Issue
Block a user