mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
The Saturday paper by Alistair Francis
Merge branch 'alistair/saturdaypaper' of https://github.com/alistair23/calibre
This commit is contained in:
commit
2f998e0e8e
69
recipes/the_saturday_paper.recipe
Normal file
69
recipes/the_saturday_paper.recipe
Normal file
@ -0,0 +1,69 @@
|
||||
#!/usr/bin/env python
|
||||
# vim:fileencoding=utf-8
|
||||
# License: GPLv3 Copyright: 2021, Alistair Francis <alistair@alistair23.me>
|
||||
|
||||
from __future__ import absolute_import, division, print_function, unicode_literals
|
||||
from calibre.web.feeds.recipes import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class SaturdayPaper(BasicNewsRecipe):
|
||||
title = 'The Saturday Paper'
|
||||
__author__ = 'Alistair Francis'
|
||||
description = 'The Saturday Paper is a weekly newspaper, dedicated to narrative journalism. It offers the biggest names and best writing in news, culture, and analysis, with a particular focus on Australia. '
|
||||
language = 'en_AU'
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
encoding = 'utf-8'
|
||||
|
||||
keep_only_tags = [
|
||||
classes('article-page__content article__text article-page__title article-page__image')
|
||||
]
|
||||
remove_tags = [
|
||||
classes('social-icons-article-bottom-container article-page__sidebar')
|
||||
]
|
||||
|
||||
def parse_index(self):
|
||||
feeds = [
|
||||
('News', 'https://www.thesaturdaypaper.com.au/news'),
|
||||
('Opinion', 'https://www.thesaturdaypaper.com.au/opinion'),
|
||||
('Culture', 'https://www.thesaturdaypaper.com.au/culture'),
|
||||
('Life', 'https://www.thesaturdaypaper.com.au/life'),
|
||||
('Food', 'https://www.thesaturdaypaper.com.au/food/latest'),
|
||||
('Puzzles', 'https://www.thesaturdaypaper.com.au/puzzles'),
|
||||
('Sport', 'https://www.thesaturdaypaper.com.au/sport'),
|
||||
]
|
||||
|
||||
articles = []
|
||||
|
||||
for (feed, link) in feeds:
|
||||
soup = self.index_to_soup(link)
|
||||
news = []
|
||||
|
||||
for a in soup.findAll(**classes('article')):
|
||||
title = a.find(class_='article__title')
|
||||
title = self.tag_to_string(title)
|
||||
|
||||
url = a.find(class_="article__title_link")
|
||||
if url == None:
|
||||
continue
|
||||
url = url['href']
|
||||
if url.startswith('/'):
|
||||
url = 'https://www.thesaturdaypaper.com.au' + url
|
||||
|
||||
desc = a.find(class_='article__text')
|
||||
desc = self.tag_to_string(desc)
|
||||
|
||||
self.log(title, ' at ', url)
|
||||
|
||||
news.append({'title': title, 'url': url, 'description': desc})
|
||||
|
||||
articles.append((feed, news))
|
||||
|
||||
return articles
|
||||
|
Loading…
x
Reference in New Issue
Block a user