mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-08 10:44:09 -04:00
Update Sports Illustrated
This commit is contained in:
parent
14c8099abb
commit
bacb0b78a8
@ -7,6 +7,12 @@ from __future__ import absolute_import, division, print_function, unicode_litera
|
|||||||
from calibre.web.feeds.news import BasicNewsRecipe
|
from calibre.web.feeds.news import BasicNewsRecipe
|
||||||
|
|
||||||
|
|
||||||
|
def absolutize(href):
|
||||||
|
if href.startswith('/'):
|
||||||
|
href = 'https://www.si.com' + href
|
||||||
|
return href
|
||||||
|
|
||||||
|
|
||||||
def classes(classes):
|
def classes(classes):
|
||||||
q = frozenset(classes.split(' '))
|
q = frozenset(classes.split(' '))
|
||||||
return dict(attrs={
|
return dict(attrs={
|
||||||
@ -25,11 +31,11 @@ class SI(BasicNewsRecipe):
|
|||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
classes('headline article-content'),
|
classes('m-detail-header m-detail--body'),
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
classes('media-video OUTBRAIN'),
|
classes('media-video OUTBRAIN m-in-content-ad-row'),
|
||||||
dict(name='meta'),
|
dict(name=['button', 'meta', 'source']),
|
||||||
]
|
]
|
||||||
|
|
||||||
def get_browser(self, *args, **kwargs):
|
def get_browser(self, *args, **kwargs):
|
||||||
@ -42,20 +48,24 @@ class SI(BasicNewsRecipe):
|
|||||||
tag.name = 'img'
|
tag.name = 'img'
|
||||||
del tag.contents[:]
|
del tag.contents[:]
|
||||||
tag['src'] = tag['data-src']
|
tag['src'] = tag['data-src']
|
||||||
|
tag['height'] = tag['width'] = ''
|
||||||
return soup
|
return soup
|
||||||
|
|
||||||
feeds = [
|
def parse_index(self):
|
||||||
('Top stories', 'https://www.si.com/rss/si_topstories.rss'),
|
soup = self.index_to_soup('https://www.si.com/')
|
||||||
('NFL', 'https://www.si.com/rss/si_nfl.rss'),
|
# from calibre.utils.ipython import ipython
|
||||||
('College Football', 'https://www.si.com/rss/si_ncaaf.rss'),
|
# ipython({'soup': soup})
|
||||||
('MLB', 'https://www.si.com/rss/si_mlb.rss'),
|
cats = {}
|
||||||
('NBA', 'https://www.si.com/rss/si_nba.rss'),
|
for ps in soup.findAll('phoenix-super-link'):
|
||||||
('College basketball', 'https://www.si.com/rss/si_ncaab.rss'),
|
h2 = ps.find('h2')
|
||||||
('NHL', 'https://www.si.com/rss/si_hockey.rss'),
|
title = self.tag_to_string(h2)
|
||||||
('Soccer', 'https://www.si.com/rss/si_soccer.rss'),
|
label = ps.find(attrs={'phx-track-id': 'Label'})
|
||||||
('Tennis', 'https://www.si.com/rss/si_tennis.rss'),
|
category = self.tag_to_string(label) if label is not None else 'Features'
|
||||||
('Fantasy', 'https://www.si.com/rss/si_fantasy.rss'),
|
url = absolutize(ps['href'])
|
||||||
('MMA', 'https://www.si.com/rss/si_mma.rss'),
|
arts = cats.setdefault(category, [])
|
||||||
('Swim Daily', 'https://www.si.com/rss/si_swim_daily.rss'),
|
arts.append({'title': title, 'url': url})
|
||||||
('Writers', 'https://www.si.com/rss/si_writers.rss'),
|
self.log('Found article', title)
|
||||||
]
|
ans = []
|
||||||
|
for key in sorted(cats, key=lambda x: x.lower()):
|
||||||
|
ans.append((key, cats[key]))
|
||||||
|
return ans
|
||||||
|
Loading…
x
Reference in New Issue
Block a user