mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-06-23 15:30:45 -04:00
Update The Independent UK
This commit is contained in:
parent
924acd1d0c
commit
66be8fe65d
@ -5,6 +5,12 @@ from calibre.web.feeds.recipes import BasicNewsRecipe
|
|||||||
from calibre.ebooks.BeautifulSoup import Tag
|
from calibre.ebooks.BeautifulSoup import Tag
|
||||||
|
|
||||||
|
|
||||||
|
def classes(classes):
|
||||||
|
q = frozenset(classes.split(' '))
|
||||||
|
return dict(attrs={
|
||||||
|
'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||||
|
|
||||||
|
|
||||||
class TheIndependentNew(BasicNewsRecipe):
|
class TheIndependentNew(BasicNewsRecipe):
|
||||||
|
|
||||||
title = u'The Independent'
|
title = u'The Independent'
|
||||||
@ -28,15 +34,11 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
compress_news_images = True
|
compress_news_images = True
|
||||||
|
|
||||||
keep_only_tags = [
|
keep_only_tags = [
|
||||||
dict(itemprop=['articleBody', 'headline', 'contentUrl']),
|
classes('headline sub-headline breadcrumb author publish-date hero-image body-content'),
|
||||||
dict(attrs={'class': ['intro', 'author']}),
|
|
||||||
]
|
]
|
||||||
remove_tags = [
|
remove_tags = [
|
||||||
dict(attrs={'class': lambda x: x and 'show-all' in x.split()}),
|
classes('inline-related inline-readmore ad-wrapper icon-gallery i-gallery')
|
||||||
dict(attrs={'class': lambda x: x and 'context-sdl_editor_representation' in x.split()}),
|
|
||||||
dict(attrs={'data-scald-gallery': True}),
|
|
||||||
]
|
]
|
||||||
|
|
||||||
remove_attributes = ['style']
|
remove_attributes = ['style']
|
||||||
|
|
||||||
def get_browser(self, *a, **kw):
|
def get_browser(self, *a, **kw):
|
||||||
@ -51,6 +53,10 @@ class TheIndependentNew(BasicNewsRecipe):
|
|||||||
return br
|
return br
|
||||||
|
|
||||||
def preprocess_html(self, soup):
|
def preprocess_html(self, soup):
|
||||||
|
for img in soup.findAll('amp-img'):
|
||||||
|
img.name = 'img'
|
||||||
|
img['srcset'] = ''
|
||||||
|
|
||||||
for div in soup.findAll(attrs={'class': 'full-gallery'}):
|
for div in soup.findAll(attrs={'class': 'full-gallery'}):
|
||||||
imgs = {}
|
imgs = {}
|
||||||
for li in div.findAll('li', attrs={'data-gallery-item': True, 'data-original': True}):
|
for li in div.findAll('li', attrs={'data-gallery-item': True, 'data-original': True}):
|
||||||
|
Loading…
x
Reference in New Issue
Block a user