calibre/recipes/sports_illustrated.recipe
Kovid Goyal 29cd8d64ea
Change shebangs to python from python2
Also remove a few other miscellaneous references to python2
2020-08-22 18:47:51 +05:30

72 lines
2.2 KiB
Python

#!/usr/bin/env python
# vim:fileencoding=utf-8
# License: GPLv3 Copyright: 2018, Kovid Goyal <kovid at kovidgoyal.net>
from __future__ import absolute_import, division, print_function, unicode_literals
from calibre.web.feeds.news import BasicNewsRecipe
def absolutize(href):
if href.startswith('/'):
href = 'https://www.si.com' + href
return href
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class SI(BasicNewsRecipe):
title = 'Sports Illustrated'
__author__ = 'Kovid Goyal'
language = 'en'
encoding = 'utf-8'
ignore_duplicate_articles = {'title', 'url'}
no_stylesheets = True
compress_news_images = True
compress_news_images_auto_size = 5
remove_attributes = ['style']
keep_only_tags = [
classes('m-detail-header m-detail--body'),
]
remove_tags = [
classes('media-video OUTBRAIN m-in-content-ad-row'),
dict(name=['button', 'meta', 'source']),
]
def get_browser(self, *args, **kwargs):
br = BasicNewsRecipe.get_browser(self, *args, **kwargs)
br.addheaders += [('Accept-Language', 'en')]
return br
def preprocess_html(self, soup, *a):
for tag in soup.findAll(attrs={'data-src': True}):
tag.name = 'img'
del tag.contents[:]
tag['src'] = tag['data-src']
tag['height'] = tag['width'] = ''
return soup
def parse_index(self):
soup = self.index_to_soup('https://www.si.com/')
# from calibre.utils.ipython import ipython
# ipython({'soup': soup})
cats = {}
for ps in soup.findAll('phoenix-super-link'):
h2 = ps.find('h2')
title = self.tag_to_string(h2)
label = ps.find(attrs={'phx-track-id': 'Label'})
category = self.tag_to_string(label) if label is not None else 'Features'
url = absolutize(ps['href'])
arts = cats.setdefault(category, [])
arts.append({'title': title, 'url': url})
self.log('Found article', title)
ans = []
for key in sorted(cats, key=lambda x: x.lower()):
ans.append((key, cats[key]))
return ans