mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
81 lines
2.8 KiB
Python
81 lines
2.8 KiB
Python
#!/usr/bin/env python
|
|
# vim:fileencoding=utf-8
|
|
# License: GPLv3 Copyright: 2021, Alistair Francis <alistair@alistair23.me>
|
|
|
|
from __future__ import absolute_import, division, print_function, unicode_literals
|
|
|
|
from calibre.web.feeds.recipes import BasicNewsRecipe
|
|
|
|
|
|
def classes(classes):
|
|
q = frozenset(classes.split(' '))
|
|
return dict(attrs={
|
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
|
|
|
|
class SaturdayPaper(BasicNewsRecipe):
|
|
title = 'The Saturday Paper'
|
|
__author__ = 'Alistair Francis'
|
|
description = (
|
|
'The Saturday Paper is a weekly newspaper, dedicated to narrative journalism.'
|
|
' It offers the biggest names and best writing in news, culture, and analysis, with a particular focus on Australia. ')
|
|
language = 'en_AU'
|
|
no_stylesheets = True
|
|
remove_javascript = True
|
|
encoding = 'utf-8'
|
|
|
|
keep_only_tags = [
|
|
classes('article-page__content article__text article__image article-page__title article-page__image')
|
|
]
|
|
remove_tags = [
|
|
classes('social-icons-article-top-container social-icons-article-bottom-container'
|
|
' article-page__sidebar article-page__social__icons share-wrapper article-footer-container')
|
|
]
|
|
remove_tags_after = [
|
|
{'name': 'div', 'class': 'end-matter'},
|
|
]
|
|
|
|
def get_cover_url(self):
|
|
soup = self.index_to_soup('https://www.thesaturdaypaper.com.au/editions/')
|
|
div = soup.find('div', attrs={'class':'article__image'})
|
|
return div.img['src']
|
|
|
|
def parse_index(self):
|
|
feeds = [
|
|
('News', 'https://www.thesaturdaypaper.com.au/news'),
|
|
('Opinion', 'https://www.thesaturdaypaper.com.au/opinion'),
|
|
('Culture', 'https://www.thesaturdaypaper.com.au/culture'),
|
|
('Life', 'https://www.thesaturdaypaper.com.au/life'),
|
|
('Food', 'https://www.thesaturdaypaper.com.au/food/latest'),
|
|
('Puzzles', 'https://www.thesaturdaypaper.com.au/puzzles'),
|
|
('Sport', 'https://www.thesaturdaypaper.com.au/sport'),
|
|
]
|
|
|
|
articles = []
|
|
|
|
for feed, link in feeds:
|
|
soup = self.index_to_soup(link)
|
|
news = []
|
|
|
|
for a in soup.findAll(**classes('article')):
|
|
title = a.find(class_='article__title')
|
|
title = self.tag_to_string(title)
|
|
|
|
url = a.find(class_='article__title_link')
|
|
if url is None:
|
|
continue
|
|
url = url['href']
|
|
if url.startswith('/'):
|
|
url = 'https://www.thesaturdaypaper.com.au' + url
|
|
|
|
desc = a.find(class_='article__text')
|
|
desc = self.tag_to_string(desc)
|
|
|
|
self.log(title, ' at ', url)
|
|
|
|
news.append({'title': title, 'url': url, 'description': desc})
|
|
|
|
articles.append((feed, news))
|
|
|
|
return articles
|