Update Miami Herald

Fixes #1906178 [[Enhancement] Fetch news: 30+ McClatchy newspapers](https://bugs.launchpad.net/calibre/+bug/1906178)
This commit is contained in:
Kovid Goyal 2020-12-27 13:38:22 +05:30
parent c3e9853221
commit 0c678a1dc4
No known key found for this signature in database
GPG Key ID: 06BC317B515ACE7C

View File

@ -8,6 +8,12 @@ miamiherald.com
from calibre.web.feeds.news import BasicNewsRecipe from calibre.web.feeds.news import BasicNewsRecipe
def classes(classes):
q = frozenset(classes.split(' '))
return dict(attrs={
'class': lambda x: x and frozenset(x.split()).intersection(q)})
class TheMiamiHerald(BasicNewsRecipe): class TheMiamiHerald(BasicNewsRecipe):
title = 'The Miami Herald' title = 'The Miami Herald'
__author__ = 'Kovid Goyal' __author__ = 'Kovid Goyal'
@ -25,9 +31,10 @@ class TheMiamiHerald(BasicNewsRecipe):
} }
keep_only_tags = [ keep_only_tags = [
classes('story-body')
] ]
remove_tags = [ remove_tags = [
classes('social-network-macro social-media')
] ]
feeds = [ feeds = [
@ -47,3 +54,19 @@ class TheMiamiHerald(BasicNewsRecipe):
(u'Environment', u'https://www.miamiherald.com/news/local/environment/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Environment', u'https://www.miamiherald.com/news/local/environment/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
(u'Sports', u'https://www.miamiherald.com/sports/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'), (u'Sports', u'https://www.miamiherald.com/sports/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
] ]
def get_browser(self, *a, **kw):
# MyClatchy servers dont like the user-agent header, they hang forever
# when it is present
br = BasicNewsRecipe.get_browser(self, *a, **kw)
br.addheaders = [x for x in br.addheaders if x[0].lower() != 'user-agent']
return br
def preprocess_html(self, soup):
for picture in soup.findAll('picture'):
img = picture.find('img')
for i, source in enumerate(picture.findAll('source')):
if i == 0:
img['src'] = source['srcset'].split()[0]
source.extract()
return soup