mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-08-11 09:13:57 -04:00
Update ProPublica
Fixes #1895457 [Pro Publica news only creates Table of Contents, no articles.](https://bugs.launchpad.net/calibre/+bug/1895457)
This commit is contained in:
parent
4b58b7e550
commit
aa1482d782
@ -2,6 +2,12 @@
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(attrs={
|
||||
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class ProPublicaRecipe(BasicNewsRecipe):
|
||||
__license__ = 'GPL v3'
|
||||
__author__ = 'kwetal'
|
||||
@ -21,49 +27,25 @@ class ProPublicaRecipe(BasicNewsRecipe):
|
||||
no_stylesheets = True
|
||||
remove_javascript = True
|
||||
|
||||
keep_only_tags = []
|
||||
keep_only_tags.append(dict(name='div', attrs={'class': 'article'}))
|
||||
keep_only_tags = [
|
||||
classes('title-wrapper content-wrapper article-header lead-art article-body')
|
||||
]
|
||||
remove_tags = [
|
||||
classes('email-signup story-tools newsletter topics-list')
|
||||
]
|
||||
remove_tags_after = classes('article-body')
|
||||
|
||||
remove_tags = []
|
||||
remove_tags.append(dict(name='div', attrs={'id': 'rollups'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'follow_info'}))
|
||||
remove_tags.append(dict(name='ul', attrs={'class': 'long-tools-top'}))
|
||||
remove_tags.append(dict(name='ul', attrs={'id': 'share-box'}))
|
||||
remove_tags.append(dict(name='div', attrs={'class': 'tags'}))
|
||||
remove_tags.append(dict(name='ul', attrs={'class': 'long-tools'}))
|
||||
remove_tags.append(dict(name='ul', attrs={'id': 'share-box2'}))
|
||||
remove_tags.append(dict(name='p', attrs={'id': 'original-url'}))
|
||||
feeds = [
|
||||
(u'Top Stories', u'http://feeds.propublica.org/propublica/main'),
|
||||
]
|
||||
|
||||
feeds = []
|
||||
feeds.append(
|
||||
(u'Top Stories', u'http://feeds.propublica.org/propublica/main'))
|
||||
feeds.append(
|
||||
(u'Stimulus', u'http://feeds.propublica.org/propublica/watchdog/stimulus'))
|
||||
feeds.append(
|
||||
(u'Bailout', u'http://feeds.propublica.org/propublica/watchdog/bailout'))
|
||||
feeds.append(
|
||||
(u'Business', u'http://feeds.propublica.org/propublica/business-money'))
|
||||
feeds.append(
|
||||
(u'Justice', u'http://feeds.propublica.org/propublica/justice-law'))
|
||||
feeds.append((u'Energy & Environment',
|
||||
u'http://feeds.propublica.org/propublica/energy-environment'))
|
||||
feeds.append((u'Government & Politics',
|
||||
u'http://feeds.propublica.org/propublica/government-politics'))
|
||||
feeds.append(
|
||||
(u'Health & Science', u'http://feeds.propublica.org/propublica/health-science'))
|
||||
feeds.append((u'Media & Technology',
|
||||
u'http://feeds.propublica.org/propublica/media-technology'))
|
||||
feeds.append((u'National Security',
|
||||
u'http://feeds.propublica.org/propublica/national-security'))
|
||||
|
||||
conversion_options = {'comments': description, 'tags': category, 'language': 'en',
|
||||
'publisher': publisher}
|
||||
|
||||
extra_css = '''
|
||||
body{font-family:verdana,arial,helvetica,geneva,sans-serif;}
|
||||
img {float: left; margin-right: 0.5em;}
|
||||
h1 {text-align: left;}
|
||||
a, a[href] {text-decoration: none; color: blue;}
|
||||
div.cat {font-size: x-small; color: #666666; margin-bottom: 0.1em;}
|
||||
div.info {font-size: small; color: #696969;}
|
||||
'''
|
||||
def preprocess_html(self, soup):
|
||||
for img in soup.findAll('img', **classes('lazyload')):
|
||||
source = img.findParent('picture')
|
||||
if source is not None:
|
||||
source = source.find('source')
|
||||
if source is not None:
|
||||
img['src'] = source['data-srcset']
|
||||
for img in soup.findAll('img', srcset=True):
|
||||
img['src'] = img['srcset'].split()[0]
|
||||
return soup
|
||||
|
Loading…
x
Reference in New Issue
Block a user