mirror of
https://github.com/kovidgoyal/calibre.git
synced 2025-07-09 03:04:10 -04:00
Cleanup tags for removal
This commit is contained in:
parent
889f07dcdc
commit
b25718deca
@ -8,13 +8,11 @@ import html5lib
|
||||
from lxml import html
|
||||
from calibre.web.feeds.news import BasicNewsRecipe
|
||||
|
||||
|
||||
def classes(classes):
|
||||
q = frozenset(classes.split(' '))
|
||||
return dict(
|
||||
attrs={'class': lambda x: x and frozenset(x.split()).intersection(q)})
|
||||
|
||||
|
||||
class TimesOnline(BasicNewsRecipe):
|
||||
title = 'The Times & Sunday Times (UK)'
|
||||
__author__ = 'Bobby Steel'
|
||||
@ -87,12 +85,9 @@ class TimesOnline(BasicNewsRecipe):
|
||||
return br
|
||||
|
||||
remove_tags = [
|
||||
{'attrs': {'class': lambda x: x and 'is-hidden' in x}},
|
||||
{'attrs': {'class': lambda x: x and 'Toolbar' in x}},
|
||||
{'attrs': {'class': lambda x: x and 'Tooltip' in x}},
|
||||
{'attrs': {'class': lambda x: x and 'Topics' in x}},
|
||||
classes('is-hidden Toolbar Tooltip Topics Comments u-hide RelatedLinks ArticlePager Media-caption'),
|
||||
{'name': ['object', 'link', 'iframe', 'base', 'meta', 'script']},
|
||||
{'attrs': {'class': ['u-hide', 'RelatedLinks', 'Comments Article-container','ArticlePager','Media-caption']}}]
|
||||
]
|
||||
|
||||
remove_attributes = ['lang']
|
||||
keep_only_tags = [{
|
||||
|
Loading…
x
Reference in New Issue
Block a user