mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-09-29 15:31:08 -04:00
Fixes #1906178 [[Enhancement] Fetch news: 30+ McClatchy newspapers](https://bugs.launchpad.net/calibre/+bug/1906178)
73 lines
3.6 KiB
Plaintext
73 lines
3.6 KiB
Plaintext
|
|
__license__ = 'GPL v3'
|
|
__copyright__ = '2009-2010, Darko Miletic <darko.miletic at gmail.com>'
|
|
'''
|
|
miamiherald.com
|
|
'''
|
|
|
|
from calibre.web.feeds.news import BasicNewsRecipe
|
|
|
|
|
|
def classes(classes):
|
|
q = frozenset(classes.split(' '))
|
|
return dict(attrs={
|
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
|
|
|
|
|
class TheMiamiHerald(BasicNewsRecipe):
|
|
title = 'The Miami Herald'
|
|
__author__ = 'Kovid Goyal'
|
|
description = "Miami-Dade and Broward's source for the latest breaking local news on sports, weather, business, jobs, real estate, shopping, health, travel, entertainment, & more." # noqa
|
|
oldest_article = 1
|
|
max_articles_per_feed = 100
|
|
publisher = u'The Miami Herald'
|
|
language = 'en'
|
|
no_stylesheets = True
|
|
use_embedded_content = False
|
|
encoding = 'utf-8'
|
|
remove_javascript = True
|
|
conversion_options = {
|
|
'comment': description, 'publisher': publisher, 'language': language
|
|
}
|
|
|
|
keep_only_tags = [
|
|
classes('story-body')
|
|
]
|
|
remove_tags = [
|
|
classes('social-network-macro social-media')
|
|
]
|
|
|
|
feeds = [
|
|
|
|
(u'News', u'https://www.miamiherald.com/news/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Miami-Dade', u'https://www.miamiherald.com/news/local/community/miami-dade/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Broward', u'https://www.miamiherald.com/news/local/community/broward/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Florida Keys', u'https://www.miamiherald.com/news/local/community/florida-keys/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Florida', u'https://www.miamiherald.com/news/state/florida/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'National', u'https://www.miamiherald.com/news/nation-world/national/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'World', u'https://www.miamiherald.com/news/nation-world/world/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Americas', u'https://www.miamiherald.com/news/nation-world/world/americas/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Cuba', u'https://www.miamiherald.com/news/nation-world/world/americas/cuba/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Haiti', u'https://www.miamiherald.com/news/nation-world/world/americas/haiti/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Politics', u'https://www.miamiherald.com/news/politics-government/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Education', u'https://www.miamiherald.com/news/local/education/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Environment', u'https://www.miamiherald.com/news/local/environment/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
(u'Sports', u'https://www.miamiherald.com/sports/?widgetName=rssfeed&widgetContentId=712015&getXmlFeed=true'),
|
|
]
|
|
|
|
def get_browser(self, *a, **kw):
|
|
# MyClatchy servers dont like the user-agent header, they hang forever
|
|
# when it is present
|
|
br = BasicNewsRecipe.get_browser(self, *a, **kw)
|
|
br.addheaders = [x for x in br.addheaders if x[0].lower() != 'user-agent']
|
|
return br
|
|
|
|
def preprocess_html(self, soup):
|
|
for picture in soup.findAll('picture'):
|
|
img = picture.find('img')
|
|
for i, source in enumerate(picture.findAll('source')):
|
|
if i == 0:
|
|
img['src'] = source['srcset'].split()[0]
|
|
source.extract()
|
|
return soup
|